def calculate_lower_mutual_information(layeroutput, PXs, PYs, unique_inverse_x, unique_inverse_y): "Calculate mutual information based on lower bound" noise_variance = 1e-3 h_lower = kde.entropy_estimator_bd( layeroutput, noise_variance ) #Calculated marginal entropy based on Bhattacharyaa distance h_condition_on_data = kde.kde_condentropy( layeroutput, noise_variance ) # Calculate conditional entropy of T given X based on assumption of Gaussian Distribution nats2bits = 1.0 / np.log(2) h_condition_on_label = 0 for i in range(len(PYs)): # Calculate conditional entropy of T given Y hcond_lower = kde.entropy_estimator_bd( layeroutput[unique_inverse_y == i], noise_variance) h_condition_on_label += PYs[i] * hcond_lower local_IXT = nats2bits * (h_lower - h_condition_on_data ) # I(X;T)=H(T)-H(T|X) local_ITY = nats2bits * (h_lower - h_condition_on_label ) # I(Y;T)=H(T)-H(T|Y) return local_IXT, local_ITY
def mutual_information_kde(X, Y, sig=0.01): np.random.seed(None) X += np.random.normal(0, sig, (len(X), len(X[0]))) X = abs(X) if not lower: h_t_given_y += pys1[i] * kde.entropy_estimator_kl( tf.convert_to_tensor(data_given_y[i]), sig) else: h_t_given_y += pys1[i] * kde.entropy_estimator_bd( tf.convert_to_tensor(data_given_y[i]), sig) datatf = tf.convert_to_tensor(data) if not lower: data_entropy = kde.entropy_estimator_kl(datatf, sig) else: data_entropy = kde.entropy_estimator_bd(datatf, sig) local_IXT = data_entropy - kde.kde_condentropy(data, sig) local_ITY = data_entropy - h_t_given_y sess = tf.Session() mi1, mi2 = sess.run([local_IXT, local_ITY]) sess.close() params = {} final_mi = mi1 / np.log(2) return final_mi
def calculate_lower_mutual_information(layeroutput,PXs, PYs,unique_inverse_x,unique_inverse_y,bins): noise_variance = 1e-3 # Compute marginal entropies # h_upper=kde.entropy_estimator_kl(layeroutput, noise_variance) h_lower=kde.entropy_estimator_bd(layeroutput, noise_variance) h_condition_on_data=kde.kde_condentropy(layeroutput, noise_variance) nats2bits = 1.0/np.log(2) h_condition_on_label=0 for i in range(len(PYs)): hcond_lower = kde.entropy_estimator_bd(layeroutput[unique_inverse_y==i],noise_variance) h_condition_on_label += PYs[i] * hcond_lower local_IXT=nats2bits*(h_lower-h_condition_on_data) local_ITY=nats2bits*(h_lower-h_condition_on_label) # init = tf.global_variables_initializer() # sess = tf.Session() # sess.run(init) # local_IXT=local_IXT.eval(sess) # local_ITY=local_ITY.eval(sess) # print('KDEI(X;T) is:'+str(local_IXT),'KDEI(Y;T) is:'+str(local_ITY)) # with sess.as_default(): # local_IXT=nats2bits*(h_lower-h_condition_on_data).eval() # local_ITY=nats2bits*(h_lower-h_condition_on_label).eval() return local_IXT,local_ITY
def calculate_mutual_information(layeroutput, PXs, PYs, unique_inverse_x, unique_inverse_y): "Calculate mutual information based on uppper bound" noise_variance = 1e-3 # The variance of noise added in estimator according to Kolchinsky and Tracey, Nonlinear Information Bottleneck, 2017. Eq. 10 h_upper = kde.entropy_estimator_kl( layeroutput, noise_variance) #Calculate marginal entropy based on KL divergence h_lower = kde.entropy_estimator_bd( layeroutput, noise_variance ) #Calculated marginal entropy based on Bhattacharyaa distance h_condition_on_data = kde.kde_condentropy( layeroutput, noise_variance ) # Calculate conditional entropy of T given X based on assumption of Gaussian Distribution nats2bits = 1.0 / np.log(2) h_condition_on_label = 0 for i in range(len(PYs)): # Calculate conditional entropy of T given Y hcond_upper = kde.entropy_estimator_kl( layeroutput[unique_inverse_y == i], noise_variance) h_condition_on_label += PYs[i] * hcond_upper local_IXT = nats2bits * (h_upper - h_condition_on_data ) # I(X;T)=H(T)-H(T|X) local_ITY = nats2bits * (h_upper - h_condition_on_label ) # I(Y;T)=H(T)-H(T|Y) return local_IXT, local_ITY
#ARCH = '32-28-24-20-16-12-8-8' DIR_TEMPLATE = '%%s_%s' % ARCH # Functions to return upper and lower bounds on entropy of layer activity noise_variance = 1e-3 # Added Gaussian noise variance binsize = 0.07 # size of bins for binning method Klayer_activity = K.placeholder(ndim=2) # Keras placeholder entropy_func_upper = K.function([ Klayer_activity, ], [ kde.entropy_estimator_kl(Klayer_activity, noise_variance), ]) entropy_func_lower = K.function([ Klayer_activity, ], [ kde.entropy_estimator_bd(Klayer_activity, noise_variance), ]) # nats to bits conversion factor nats2bits = 1.0 / np.log(2) # Save indexes of tests data for each of the output classes saved_labelixs = {} y = tst.y Y = tst.Y if FULL_MI: full = utils.construct_full_dataset(trn, tst) y = full.y Y = full.Y
import kde import keras.backend as K import numpy as np Y_samples = K.placeholder(ndim=2) noise_variance = 0.05 entropy_func_upper = K.function([ Y_samples, ], [ kde.entropy_estimator_kl(Y_samples, noise_variance), ]) entropy_func_lower = K.function([ Y_samples, ], [ kde.entropy_estimator_bd(Y_samples, noise_variance), ]) data = np.random.random(size=(1000, 20)) # N x dims H_Y_given_X = kde.kde_condentropy(data, noise_variance) H_Y_upper = entropy_func_upper([ data, ])[0] H_Y_lower = entropy_func_lower([ data, ])[0] print("Upper bound: %0.3f nats" % (H_Y_upper - H_Y_given_X)) print("Lower bound: %0.3f nats" % (H_Y_lower - H_Y_given_X)) # Alternative calculation, direct from distance matrices
def computeMI(id): train, test = utils.get_IB_data('2017_12_21_16_51_3_275766') # For both train and test musr correspond with saving code FULL_MI = True # MI Measure infoplane_measure = 'bin' DO_SAVE = True DO_LOWER = (infoplane_measure == 'lower') DO_BINNED = (infoplane_measure == 'bin') MAX_EPOCHS = 5000 NUM_LABELS = 2 COLORBAR_MAX_EPOCHS = 5000 # Directory For Loading saved Data ARCH = '10-7-5-4-3' DIR_TEMPLATE = '%%s_%s' % ARCH noise_variance = 1e-3 binsize = 0.07 Klayer_activity = K.placeholder(ndim=2) entropy_func_upper = K.function([ Klayer_activity, ], [ kde.entropy_estimator_kl(Klayer_activity, noise_variance), ]) entropy_func_lower = K.function([ Klayer_activity, ], [ kde.entropy_estimator_bd(Klayer_activity, noise_variance), ]) # Nats to bits conversion nats2bits = 1.0 / np.log(2) # Indexes of tests data for each of Output Classes saved_labelixs = {} y = test.y Y = test.Y if FULL_MI: full = utils.construct_full_dataset(train, test) y = full.y Y = full.Y for i in range(NUM_LABELS): saved_labelixs[i] = (y == i) labelprobs = np.mean(Y, axis=0) # Layers to plot, None for all PLOT_LAYERS = None # Store Results measures = OrderedDict() measures['tanh'] = {} measures['relu'] = {} for activation in measures.keys(): cur_dir = 'rawdata/' + DIR_TEMPLATE % activation if not os.path.exists(cur_dir): print("Directory %s not found" % cur_dir) continue print("******* Loading %s ******" % cur_dir) for epochfile in sorted(os.listdir(cur_dir)): if not epochfile.startswith('epoch'): continue fname = cur_dir + '/' + epochfile with open(fname, 'rb') as f: d = cPickle.load(f) epoch = d['epoch'] if epoch in measures[activation]: continue if epoch > MAX_EPOCHS: continue print("Measureing ", fname) num_layers = len(d['data']['activity_tst']) if PLOT_LAYERS is None: PLOT_LAYERS = [] for lndx in range(num_layers): PLOT_LAYERS.append(lndx) cepochdata = defaultdict(list) for lndx in range(num_layers): activity = d['data']['activity_tst'][lndx] h_upper = entropy_func_upper([ activity, ])[0] if DO_LOWER: h_lower = entropy_func_lower() hM_given_X = kde.kde_condentropy(activity, noise_variance) hM_given_Y_upper = 0 for i in range(NUM_LABELS): hcond_upper = entropy_func_upper([ activity[saved_labelixs[i], :], ])[0] hM_given_Y_upper += labelprobs[i] * hcond_upper if DO_LOWER: hM_given_Y_lower = 0 for i in range(NUM_LABELS): hcond_lower = entropy_func_lower([ activity[saved_labelixs[i], :], ])[0] hM_given_Y_lower += labelprobs[i] * hcond_lower cepochdata['MI_XM_upper'].append(nats2bits * (h_upper - hM_given_X)) cepochdata['MI_YM_upper'].append(nats2bits * (h_upper - hM_given_Y_upper)) cepochdata['H_M_upper'].append(nats2bits * h_upper) pstr = 'upper: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % ( cepochdata['MI_XM_upper'][-1], cepochdata['MI_YM_upper'][-1]) if DO_LOWER: cepochdata['MI_XM_lower'].append(nats2bits * (h_lower - hM_given_X)) cepochdata['MI_YM_lower'].append( nats2bits * (h_lower - hM_given_Y_lower)) cepochdata['H_M_lower'].append(nats2bits * h_lower) pstr += 'lower: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % ( cepochdata['MI_XM_lower'][-1], cepochdata['MI_YM_lower'][-1]) if DO_BINNED: binxm, binym = simplebinmi.bin_calc_information2( saved_labelixs, activity, binsize) cepochdata['MI_XM_bin'].append(nats2bits * binxm) cepochdata['MI_YM_bin'].append(nats2bits * binym) pstr += 'bin: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % ( cepochdata['MI_XM_bin'][-1], cepochdata['MI_YM_bin'][-1]) print('- Layer %d %s' % (lndx, pstr)) measures[activation][epoch] = cepochdata with open("MI" + str(id), 'wb') as f: cPickle.dump(measures, f)
r = model.fit(x=trn.X, y=trn.Y, verbose = 2, batch_size = args.batch_size, epochs = args.num_epochs, initial_epoch = args.start-1, validation_data=(tst.X, tst.Y), callbacks = [reporter, saver, scheduler, visualizer]) # Which measure to plot infoplane_measures = ['bin', 'upper', 'lower'] # Functions to return upper and lower bounds on entropy of layer activity noise_variance = 1e-1 # Added Gaussian noise variance Klayer_activity = K.placeholder(ndim=2) # Keras placeholder entropy_func_upper = K.function([Klayer_activity,], [kde.entropy_estimator_kl(Klayer_activity, noise_variance),]) entropy_func_lower = K.function([Klayer_activity,], [kde.entropy_estimator_bd(Klayer_activity, noise_variance),]) # nats to bits conversion factor nats2bits = 1.0/np.log(2) # Save indexes of tests data for each of the output classes saved_labelixs = {} for i in range(10): saved_labelixs[i] = tst.y == i labelprobs = np.mean(tst.Y, axis=0) PLOT_LAYERS = None # Which layers to plot. If None, all saved layers are plotted # Data structure used to store results measures = OrderedDict()
def plot_results(tst, cur_dir, model_name): infoplane_measure = 'bin' DO_SAVE = True # Whether to save plots or just show them DO_LOWER = (infoplane_measure == 'lower' ) # Whether to compute lower bounds also DO_BINNED = (infoplane_measure == 'bin' ) # Whether to compute MI estimates based on binning MAX_EPOCHS = 1000 # Max number of epoch for which to compute mutual information measure COLORBAR_MAX_EPOCHS = 1000 noise_variance = 1e-1 # Added Gaussian noise variance Klayer_activity = K.placeholder(ndim=2) # Keras placeholder entropy_func_upper = K.function([ Klayer_activity, ], [ kde.entropy_estimator_kl(Klayer_activity, noise_variance), ]) entropy_func_lower = K.function([ Klayer_activity, ], [ kde.entropy_estimator_bd(Klayer_activity, noise_variance), ]) # nats to bits conversion factor nats2bits = 1.0 / np.log(2) # Save indexes of tests data for each of the output classes saved_labelixs = {} for i in range(150): saved_labelixs[i] = tst.y == i labelprobs = np.mean(tst.Y, axis=0) PLOT_LAYERS = None # Which layers to plot. If None, all saved layers are plotted # Data structure used to store results measures = OrderedDict() measures['relu'] = {} for epochfile in sorted(os.listdir(cur_dir)): if not epochfile.startswith('epoch'): break fname = cur_dir + "/" + epochfile with open(fname, 'rb') as f: d = pickle.load(f) epoch = d['epoch'] num_layers = len(d['data']['activity_tst']) if PLOT_LAYERS is None: PLOT_LAYERS = [] for lndx in range(num_layers): PLOT_LAYERS.append(lndx) cepochdata = defaultdict(list) for lndx in range(num_layers): activity = d['data']['activity_tst'][lndx] if len(activity.shape) == 3: activity = activity[:, 1] # Compute marginal entropies h_upper = entropy_func_upper([ activity, ])[0] if DO_LOWER: h_lower = entropy_func_lower([ activity, ])[0] # Layer activity given input. This is simply the entropy of the Gaussian noise hM_given_X = kde.kde_condentropy(activity, noise_variance) # Compute conditional entropies of layer activity given output hM_given_Y_upper = 0. hcond_upper = entropy_func_upper([ activity[saved_labelixs[i], :], ])[0] hM_given_Y_upper += labelprobs * hcond_upper if DO_LOWER: hM_given_Y_lower = 0. hcond_lower = entropy_func_lower([ activity[saved_labelixs[i], :], ])[0] hM_given_Y_lower += labelprobs * hcond_lower cepochdata['MI_XM_upper'].append(nats2bits * (h_upper - hM_given_X)) cepochdata['MI_YM_upper'].append(nats2bits * (h_upper - hM_given_Y_upper)) cepochdata['H_M_upper'].append(nats2bits * h_upper) pstr = 'upper: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % ( cepochdata['MI_XM_upper'][-1], cepochdata['MI_YM_upper'][-1]) if DO_LOWER: # Compute lower bounds cepochdata['MI_XM_lower'].append(nats2bits * (h_lower - hM_given_X)) cepochdata['MI_YM_lower'].append(nats2bits * (h_lower - hM_given_Y_lower)) cepochdata['H_M_lower'].append(nats2bits * h_lower) pstr += ' | lower: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % ( cepochdata['MI_XM_lower'][-1], cepochdata['MI_YM_lower'][-1]) if DO_BINNED: # Compute binner estimates binxm, binym = simplebinmi.bin_calc_information2( saved_labelixs, activity, 0.5) cepochdata['MI_XM_bin'].append(nats2bits * binxm) cepochdata['MI_YM_bin'].append(nats2bits * binym) pstr += ' | bin: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % ( cepochdata['MI_XM_bin'][-1], cepochdata['MI_YM_bin'][-1]) measures['relu'][epoch] = cepochdata sns.set_style('darkgrid') max_epoch = max( (max(vals.keys()) if len(vals) else 0) for vals in measures.values()) sm = plt.cm.ScalarMappable(cmap='gnuplot', norm=plt.Normalize(vmin=0, vmax=COLORBAR_MAX_EPOCHS)) sm._A = [] fig = plt.figure(figsize=(10, 5)) for actndx, (activation, vals) in enumerate(measures.items()): epochs = sorted(vals.keys()) if not len(epochs): continue plt.subplot(1, 2, actndx + 1) for epoch in epochs: c = sm.to_rgba(epoch) xmvals = np.array(vals[epoch]['MI_XM_' + infoplane_measure])[PLOT_LAYERS] ymvals = np.array(vals[epoch]['MI_YM_' + infoplane_measure])[PLOT_LAYERS] plt.plot(xmvals, ymvals, c=c, alpha=0.1, zorder=1) plt.scatter(xmvals, ymvals, s=20, facecolors=[c for _ in PLOT_LAYERS], edgecolor='none', zorder=2) plt.ylim([0, 3.5]) plt.xlim([0, 14]) plt.xlabel('I(X;M)') plt.ylabel('I(Y;M)') plt.title(activation) cbaxes = fig.add_axes([1.0, 0.125, 0.03, 0.8]) plt.colorbar(sm, label='Epoch', cax=cbaxes) plt.tight_layout() if DO_SAVE: plt.savefig('plots/' + model_name + "_infoplane.png", bbox_inches='tight')