def mutual_information_kde(X, Y, sig=0.01):
    np.random.seed(None)
    X += np.random.normal(0, sig, (len(X), len(X[0])))
    X = abs(X)
    if not lower:
        h_t_given_y += pys1[i] * kde.entropy_estimator_kl(
            tf.convert_to_tensor(data_given_y[i]), sig)
    else:
        h_t_given_y += pys1[i] * kde.entropy_estimator_bd(
            tf.convert_to_tensor(data_given_y[i]), sig)
    datatf = tf.convert_to_tensor(data)
    if not lower:
        data_entropy = kde.entropy_estimator_kl(datatf, sig)
    else:
        data_entropy = kde.entropy_estimator_bd(datatf, sig)
    local_IXT = data_entropy - kde.kde_condentropy(data, sig)
    local_ITY = data_entropy - h_t_given_y
    sess = tf.Session()
    mi1, mi2 = sess.run([local_IXT, local_ITY])
    sess.close()
    params = {}
    final_mi = mi1 / np.log(2)
    return final_mi
def calculate_mutual_information(layeroutput,PXs, PYs,unique_inverse_x,unique_inverse_y,bins):
    noise_variance = 1e-3 
    # Compute marginal entropies
    h_upper=kde.entropy_estimator_kl(layeroutput, noise_variance)
    h_lower=kde.entropy_estimator_bd(layeroutput, noise_variance)
    h_condition_on_data=kde.kde_condentropy(layeroutput, noise_variance)
    nats2bits = 1.0/np.log(2) 
    h_condition_on_label=0
    for i in range(len(PYs)):
        hcond_upper = kde.entropy_estimator_kl(layeroutput[unique_inverse_y==i],noise_variance)
        h_condition_on_label += PYs[i] * hcond_upper
    local_IXT=nats2bits*(h_upper-h_condition_on_data)
    local_ITY=nats2bits*(h_upper-h_condition_on_label)
    # init = tf.global_variables_initializer()
    # sess = tf.Session()
    # sess.run(init)
    # local_IXT=local_IXT.eval(sess)
    # local_ITY=local_ITY.eval(sess)
    # with sess.as_default():
    #     local_IXT=nats2bits*(h_upper-h_condition_on_data).eval()
    #     local_ITY=nats2bits*(h_upper-h_condition_on_label).eval()
    # print('KDEI(X;T) is:',local_IXT,'KDEI(Y;T) is:',local_ITY)
    return local_IXT,local_ITY
예제 #3
0
def calculate_mutual_information(layeroutput, PXs, PYs, unique_inverse_x,
                                 unique_inverse_y):
    "Calculate mutual information based on uppper bound"
    noise_variance = 1e-3  # The variance of noise added in estimator according to Kolchinsky and Tracey, Nonlinear Information Bottleneck, 2017. Eq. 10
    h_upper = kde.entropy_estimator_kl(
        layeroutput,
        noise_variance)  #Calculate marginal entropy based on KL divergence
    h_lower = kde.entropy_estimator_bd(
        layeroutput, noise_variance
    )  #Calculated marginal entropy based on Bhattacharyaa distance
    h_condition_on_data = kde.kde_condentropy(
        layeroutput, noise_variance
    )  # Calculate conditional entropy of T given X based on assumption of Gaussian Distribution
    nats2bits = 1.0 / np.log(2)
    h_condition_on_label = 0
    for i in range(len(PYs)):  # Calculate conditional entropy of T given Y
        hcond_upper = kde.entropy_estimator_kl(
            layeroutput[unique_inverse_y == i], noise_variance)
        h_condition_on_label += PYs[i] * hcond_upper
    local_IXT = nats2bits * (h_upper - h_condition_on_data
                             )  # I(X;T)=H(T)-H(T|X)
    local_ITY = nats2bits * (h_upper - h_condition_on_label
                             )  # I(Y;T)=H(T)-H(T|Y)
    return local_IXT, local_ITY
# Directories from which to load saved layer activity
# ARCH = '1024-20-20-20'
ARCH = '10-7-5-4-3'
#ARCH = '20-20-20-20-20-20'
#ARCH = '32-28-24-20-16-12'
#ARCH = '32-28-24-20-16-12-8-8'
DIR_TEMPLATE = '%%s_%s' % ARCH

# Functions to return upper and lower bounds on entropy of layer activity
noise_variance = 1e-3  # Added Gaussian noise variance
binsize = 0.07  # size of bins for binning method
Klayer_activity = K.placeholder(ndim=2)  # Keras placeholder
entropy_func_upper = K.function([
    Klayer_activity,
], [
    kde.entropy_estimator_kl(Klayer_activity, noise_variance),
])
entropy_func_lower = K.function([
    Klayer_activity,
], [
    kde.entropy_estimator_bd(Klayer_activity, noise_variance),
])

# nats to bits conversion factor
nats2bits = 1.0 / np.log(2)

# Save indexes of tests data for each of the output classes
saved_labelixs = {}

y = tst.y
Y = tst.Y
예제 #5
0
# Simple example of how to estimate MI between X and Y, where Y = f(X) + Noise(0, noise_variance)
from __future__ import print_function
import kde
import keras.backend as K
import numpy as np

Y_samples = K.placeholder(ndim=2)

noise_variance = 0.05
entropy_func_upper = K.function([
    Y_samples,
], [
    kde.entropy_estimator_kl(Y_samples, noise_variance),
])
entropy_func_lower = K.function([
    Y_samples,
], [
    kde.entropy_estimator_bd(Y_samples, noise_variance),
])

data = np.random.random(size=(1000, 20))  # N x dims
H_Y_given_X = kde.kde_condentropy(data, noise_variance)
H_Y_upper = entropy_func_upper([
    data,
])[0]
H_Y_lower = entropy_func_lower([
    data,
])[0]

print("Upper bound: %0.3f nats" % (H_Y_upper - H_Y_given_X))
print("Lower bound: %0.3f nats" % (H_Y_lower - H_Y_given_X))
예제 #6
0
def computeMI(id):
    train, test = utils.get_IB_data('2017_12_21_16_51_3_275766')

    # For both train and test musr correspond with saving code
    FULL_MI = True

    # MI Measure
    infoplane_measure = 'bin'

    DO_SAVE = True
    DO_LOWER = (infoplane_measure == 'lower')
    DO_BINNED = (infoplane_measure == 'bin')

    MAX_EPOCHS = 5000
    NUM_LABELS = 2

    COLORBAR_MAX_EPOCHS = 5000

    # Directory For Loading saved Data
    ARCH = '10-7-5-4-3'
    DIR_TEMPLATE = '%%s_%s' % ARCH

    noise_variance = 1e-3
    binsize = 0.07
    Klayer_activity = K.placeholder(ndim=2)
    entropy_func_upper = K.function([
        Klayer_activity,
    ], [
        kde.entropy_estimator_kl(Klayer_activity, noise_variance),
    ])
    entropy_func_lower = K.function([
        Klayer_activity,
    ], [
        kde.entropy_estimator_bd(Klayer_activity, noise_variance),
    ])

    # Nats to bits conversion
    nats2bits = 1.0 / np.log(2)

    # Indexes of tests data for each of Output Classes
    saved_labelixs = {}

    y = test.y
    Y = test.Y
    if FULL_MI:
        full = utils.construct_full_dataset(train, test)
        y = full.y
        Y = full.Y

    for i in range(NUM_LABELS):
        saved_labelixs[i] = (y == i)

    labelprobs = np.mean(Y, axis=0)

    # Layers to plot, None for all
    PLOT_LAYERS = None

    # Store Results
    measures = OrderedDict()
    measures['tanh'] = {}
    measures['relu'] = {}

    for activation in measures.keys():
        cur_dir = 'rawdata/' + DIR_TEMPLATE % activation
        if not os.path.exists(cur_dir):
            print("Directory %s not found" % cur_dir)
            continue

        print("******* Loading %s ******" % cur_dir)
        for epochfile in sorted(os.listdir(cur_dir)):
            if not epochfile.startswith('epoch'):
                continue
            fname = cur_dir + '/' + epochfile
            with open(fname, 'rb') as f:
                d = cPickle.load(f)

            epoch = d['epoch']
            if epoch in measures[activation]:
                continue

            if epoch > MAX_EPOCHS:
                continue

            print("Measureing ", fname)

            num_layers = len(d['data']['activity_tst'])
            if PLOT_LAYERS is None:
                PLOT_LAYERS = []
                for lndx in range(num_layers):
                    PLOT_LAYERS.append(lndx)

            cepochdata = defaultdict(list)
            for lndx in range(num_layers):
                activity = d['data']['activity_tst'][lndx]

                h_upper = entropy_func_upper([
                    activity,
                ])[0]
                if DO_LOWER:
                    h_lower = entropy_func_lower()

                hM_given_X = kde.kde_condentropy(activity, noise_variance)
                hM_given_Y_upper = 0
                for i in range(NUM_LABELS):
                    hcond_upper = entropy_func_upper([
                        activity[saved_labelixs[i], :],
                    ])[0]
                    hM_given_Y_upper += labelprobs[i] * hcond_upper

                if DO_LOWER:
                    hM_given_Y_lower = 0
                    for i in range(NUM_LABELS):
                        hcond_lower = entropy_func_lower([
                            activity[saved_labelixs[i], :],
                        ])[0]
                        hM_given_Y_lower += labelprobs[i] * hcond_lower

                cepochdata['MI_XM_upper'].append(nats2bits *
                                                 (h_upper - hM_given_X))
                cepochdata['MI_YM_upper'].append(nats2bits *
                                                 (h_upper - hM_given_Y_upper))
                cepochdata['H_M_upper'].append(nats2bits * h_upper)

                pstr = 'upper: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                    cepochdata['MI_XM_upper'][-1],
                    cepochdata['MI_YM_upper'][-1])
                if DO_LOWER:
                    cepochdata['MI_XM_lower'].append(nats2bits *
                                                     (h_lower - hM_given_X))
                    cepochdata['MI_YM_lower'].append(
                        nats2bits * (h_lower - hM_given_Y_lower))
                    cepochdata['H_M_lower'].append(nats2bits * h_lower)
                    pstr += 'lower: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                        cepochdata['MI_XM_lower'][-1],
                        cepochdata['MI_YM_lower'][-1])

                if DO_BINNED:
                    binxm, binym = simplebinmi.bin_calc_information2(
                        saved_labelixs, activity, binsize)
                    cepochdata['MI_XM_bin'].append(nats2bits * binxm)
                    cepochdata['MI_YM_bin'].append(nats2bits * binym)
                    pstr += 'bin: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                        cepochdata['MI_XM_bin'][-1],
                        cepochdata['MI_YM_bin'][-1])
                print('- Layer %d %s' % (lndx, pstr))
            measures[activation][epoch] = cepochdata
    with open("MI" + str(id), 'wb') as f:
        cPickle.dump(measures, f)
예제 #7
0
if not args.plot:
    r = model.fit(x=trn.X, y=trn.Y, 
                  verbose    = 2, 
                  batch_size = args.batch_size,
                  epochs     = args.num_epochs,
                  initial_epoch = args.start-1,
                  validation_data=(tst.X, tst.Y),
                  callbacks  = [reporter, saver, scheduler, visualizer])

# Which measure to plot
infoplane_measures = ['bin', 'upper', 'lower']

# Functions to return upper and lower bounds on entropy of layer activity
noise_variance = 1e-1                    # Added Gaussian noise variance
Klayer_activity = K.placeholder(ndim=2)  # Keras placeholder 
entropy_func_upper = K.function([Klayer_activity,], [kde.entropy_estimator_kl(Klayer_activity, noise_variance),])
entropy_func_lower = K.function([Klayer_activity,], [kde.entropy_estimator_bd(Klayer_activity, noise_variance),])

# nats to bits conversion factor
nats2bits = 1.0/np.log(2) 

# Save indexes of tests data for each of the output classes
saved_labelixs = {}
for i in range(10):
    saved_labelixs[i] = tst.y == i

labelprobs = np.mean(tst.Y, axis=0)

PLOT_LAYERS    = None     # Which layers to plot.  If None, all saved layers are plotted 

# Data structure used to store results
예제 #8
0
def plot_results(tst, cur_dir, model_name):
    infoplane_measure = 'bin'

    DO_SAVE = True  # Whether to save plots or just show them
    DO_LOWER = (infoplane_measure == 'lower'
                )  # Whether to compute lower bounds also
    DO_BINNED = (infoplane_measure == 'bin'
                 )  # Whether to compute MI estimates based on binning

    MAX_EPOCHS = 1000  # Max number of epoch for which to compute mutual information measure
    COLORBAR_MAX_EPOCHS = 1000

    noise_variance = 1e-1  # Added Gaussian noise variance
    Klayer_activity = K.placeholder(ndim=2)  # Keras placeholder
    entropy_func_upper = K.function([
        Klayer_activity,
    ], [
        kde.entropy_estimator_kl(Klayer_activity, noise_variance),
    ])
    entropy_func_lower = K.function([
        Klayer_activity,
    ], [
        kde.entropy_estimator_bd(Klayer_activity, noise_variance),
    ])

    # nats to bits conversion factor
    nats2bits = 1.0 / np.log(2)

    # Save indexes of tests data for each of the output classes
    saved_labelixs = {}
    for i in range(150):
        saved_labelixs[i] = tst.y == i

    labelprobs = np.mean(tst.Y, axis=0)

    PLOT_LAYERS = None  # Which layers to plot.  If None, all saved layers are plotted

    # Data structure used to store results
    measures = OrderedDict()
    measures['relu'] = {}

    for epochfile in sorted(os.listdir(cur_dir)):
        if not epochfile.startswith('epoch'):
            break

        fname = cur_dir + "/" + epochfile
        with open(fname, 'rb') as f:
            d = pickle.load(f)

        epoch = d['epoch']

        num_layers = len(d['data']['activity_tst'])
        if PLOT_LAYERS is None:
            PLOT_LAYERS = []
            for lndx in range(num_layers):
                PLOT_LAYERS.append(lndx)

        cepochdata = defaultdict(list)
        for lndx in range(num_layers):
            activity = d['data']['activity_tst'][lndx]
            if len(activity.shape) == 3:
                activity = activity[:, 1]

            # Compute marginal entropies
            h_upper = entropy_func_upper([
                activity,
            ])[0]
            if DO_LOWER:
                h_lower = entropy_func_lower([
                    activity,
                ])[0]

            # Layer activity given input. This is simply the entropy of the Gaussian noise
            hM_given_X = kde.kde_condentropy(activity, noise_variance)

            # Compute conditional entropies of layer activity given output
            hM_given_Y_upper = 0.
            hcond_upper = entropy_func_upper([
                activity[saved_labelixs[i], :],
            ])[0]
            hM_given_Y_upper += labelprobs * hcond_upper

            if DO_LOWER:
                hM_given_Y_lower = 0.
                hcond_lower = entropy_func_lower([
                    activity[saved_labelixs[i], :],
                ])[0]
                hM_given_Y_lower += labelprobs * hcond_lower

            cepochdata['MI_XM_upper'].append(nats2bits *
                                             (h_upper - hM_given_X))
            cepochdata['MI_YM_upper'].append(nats2bits *
                                             (h_upper - hM_given_Y_upper))
            cepochdata['H_M_upper'].append(nats2bits * h_upper)

            pstr = 'upper: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                cepochdata['MI_XM_upper'][-1], cepochdata['MI_YM_upper'][-1])
            if DO_LOWER:  # Compute lower bounds
                cepochdata['MI_XM_lower'].append(nats2bits *
                                                 (h_lower - hM_given_X))
                cepochdata['MI_YM_lower'].append(nats2bits *
                                                 (h_lower - hM_given_Y_lower))
                cepochdata['H_M_lower'].append(nats2bits * h_lower)
                pstr += ' | lower: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                    cepochdata['MI_XM_lower'][-1],
                    cepochdata['MI_YM_lower'][-1])

            if DO_BINNED:  # Compute binner estimates
                binxm, binym = simplebinmi.bin_calc_information2(
                    saved_labelixs, activity, 0.5)
                cepochdata['MI_XM_bin'].append(nats2bits * binxm)
                cepochdata['MI_YM_bin'].append(nats2bits * binym)
                pstr += ' | bin: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                    cepochdata['MI_XM_bin'][-1], cepochdata['MI_YM_bin'][-1])

            measures['relu'][epoch] = cepochdata

    sns.set_style('darkgrid')

    max_epoch = max(
        (max(vals.keys()) if len(vals) else 0) for vals in measures.values())
    sm = plt.cm.ScalarMappable(cmap='gnuplot',
                               norm=plt.Normalize(vmin=0,
                                                  vmax=COLORBAR_MAX_EPOCHS))
    sm._A = []

    fig = plt.figure(figsize=(10, 5))
    for actndx, (activation, vals) in enumerate(measures.items()):
        epochs = sorted(vals.keys())
        if not len(epochs):
            continue
        plt.subplot(1, 2, actndx + 1)
        for epoch in epochs:
            c = sm.to_rgba(epoch)
            xmvals = np.array(vals[epoch]['MI_XM_' +
                                          infoplane_measure])[PLOT_LAYERS]
            ymvals = np.array(vals[epoch]['MI_YM_' +
                                          infoplane_measure])[PLOT_LAYERS]

            plt.plot(xmvals, ymvals, c=c, alpha=0.1, zorder=1)
            plt.scatter(xmvals,
                        ymvals,
                        s=20,
                        facecolors=[c for _ in PLOT_LAYERS],
                        edgecolor='none',
                        zorder=2)

        plt.ylim([0, 3.5])
        plt.xlim([0, 14])
        plt.xlabel('I(X;M)')
        plt.ylabel('I(Y;M)')
        plt.title(activation)

    cbaxes = fig.add_axes([1.0, 0.125, 0.03, 0.8])
    plt.colorbar(sm, label='Epoch', cax=cbaxes)
    plt.tight_layout()

    if DO_SAVE:
        plt.savefig('plots/' + model_name + "_infoplane.png",
                    bbox_inches='tight')