def test_new_interaction_information(filepath_cache, dataset, train_y, binsize=4): import math from information_discrete import mi dataset = load_dataset(filepath_cache, dataset, binsize) # ind_var34;saldo_medio_var5_ult3;target 0.0149167532518 a = mi(dataset["ind_var34"].values, dataset["saldo_medio_var5_ult3"].values) b = mi_3d(("ind_var34", dataset["ind_var34"].values), ("saldo_medio_var5_ult3", dataset["saldo_medio_var5_ult3"].values), ("target", train_y.values)) distribution = {} x = np.unique(dataset["ind_var34"].values) y = np.unique(dataset["saldo_medio_var5_ult3"].values) z = np.unique(train_y.values) from entropy_estimators import cmidd c = cmidd(dataset["ind_var34"].values, dataset["saldo_medio_var5_ult3"].values, train_y.values) print c, a, b, c-a print mi_4d(("var3", dataset["var3"].values), ("ind_var34", dataset["ind_var34"].values), ("saldo_medio_var5_ult3", dataset["saldo_medio_var5_ult3"].values), ("target", train_y.values))
print('samples used', Ntry) print('estimated MI', ent) print('95% conf int.\n', err) # DISCRETE ESTIMATORS print("\n\nTest of the discrete entropy estimators\n") print( "For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1" ) x = [0, 0, 0, 0, 1, 1, 1, 1] y = [0, 1, 0, 1, 0, 1, 0, 1] z = [0, 1, 0, 1, 1, 0, 1, 0] print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z)) print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z)) print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y)) # KL Div estimator print( "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set" ) print("should be 0 for same distribution") sample1 = [[2 * random.random()] for i in range(200)] sample2 = [[2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2)) print( "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)" ) sample2 = [[3 + 2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2))
tempent.sort() tempmean = np.mean(tempent) ent.append(tempmean) err.append((tempmean - tempent[samplo],tempent[samphi]-tempmean)) print 'samples used',Ntry print 'estimated MI',ent print '95% conf int.\n',err ## DISCRETE ESTIMATORS print "\n\nTest of the discrete entropy estimators\n" print "For z = y xor x, w/x,y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1" x = [0,0,0,0,1,1,1,1] y = [0,1,0,1,0,1,0,1] z = [0,1,0,1,1,0,1,0] print "H(x),H(y),H(z)",ee.entropyd(x),ee.entropyd(y),ee.entropyd(z) print "H(x:y),etc",ee.midd(x,y),ee.midd(z,y),ee.midd(x,z) print "H(x:y|z),etc",ee.cmidd(x,y,z),ee.cmidd(z,y,x),ee.cmidd(x,z,y) ## KL Div estimator print "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set" print "should be 0 for same distribution" sample1 = [[2*random.random()] for i in range(200)] sample2 = [[2*random.random()] for i in range(300)] print 'result:',ee.kldiv(sample1,sample2) print "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)" sample2 = [[3+2*random.random()] for i in range(300)] print 'result:',ee.kldiv(sample1,sample2)
A = [0] * len(N) AA = [0] * len(N) B = [0] * len(N) C = [0] * len(N) D = [0] * len(N) for i, n in enumerate(N): A[i] = ee.centropyd(S[ :n+tau], S[ :n]) # H(S_{n+tau} | S_n) AA[i] = ee.centropyd(S[ :n+tau], E[ :n]) # H(S_{n+tau} | E_n) B[i] = ee.midd(S[ :n+tau], E[ :n]) # I( S_{n+tau}; E_n ) C[i] = ee.midd(S[ :n], E[ :n]) # I( S_n | E_n ) D[i] = ee.cmidd(S[ :n+tau], E[ :n], S[ :n]) # I( S_{n+tau}; E_n | S_n ) fig, axs = plt.subplots(figsize=(12, 6), facecolor="white") plt.plot(N, A, 'b.-',label=r'$H(S_{n+\tau}\ |\ S_n)$') plt.plot(N, AA, 'y-',label=r'$H(S_{n+\tau}\ |\ E_n)$') plt.plot(N, B, 'r.-', label=r'$I(S_{n+\tau}\ ;\ E_n)$') plt.plot(N, C, 'm-', label=r'$I( S_n ; E_n )$') plt.plot(N, D, 'g.', label=r'$I(S_{n+\tau};\ E_n\ |\ S_n )$') plt.legend(loc=7)