def test_new_interaction_information(filepath_cache, dataset, train_y, binsize=4):
    import math
    from information_discrete import mi

    dataset = load_dataset(filepath_cache, dataset, binsize)

    # ind_var34;saldo_medio_var5_ult3;target 0.0149167532518
    a = mi(dataset["ind_var34"].values, dataset["saldo_medio_var5_ult3"].values)
    b = mi_3d(("ind_var34", dataset["ind_var34"].values), ("saldo_medio_var5_ult3", dataset["saldo_medio_var5_ult3"].values), ("target", train_y.values))

    distribution = {}
    x = np.unique(dataset["ind_var34"].values)
    y = np.unique(dataset["saldo_medio_var5_ult3"].values)
    z = np.unique(train_y.values)

    from entropy_estimators import cmidd
    c = cmidd(dataset["ind_var34"].values, dataset["saldo_medio_var5_ult3"].values, train_y.values)

    print c, a, b, c-a

    print mi_4d(("var3", dataset["var3"].values), ("ind_var34", dataset["ind_var34"].values), ("saldo_medio_var5_ult3", dataset["saldo_medio_var5_ult3"].values), ("target", train_y.values))
Example #2
0
print('samples used', Ntry)
print('estimated MI', ent)
print('95% conf int.\n', err)

# DISCRETE ESTIMATORS

print("\n\nTest of the discrete entropy estimators\n")
print(
    "For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1"
)
x = [0, 0, 0, 0, 1, 1, 1, 1]
y = [0, 1, 0, 1, 0, 1, 0, 1]
z = [0, 1, 0, 1, 1, 0, 1, 0]
print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z))
print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z))
print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y))

# KL Div estimator
print(
    "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set"
)
print("should be 0 for same distribution")
sample1 = [[2 * random.random()] for i in range(200)]
sample2 = [[2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
print(
    "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)"
)
sample2 = [[3 + 2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
Example #3
0
  tempent.sort()
  tempmean = np.mean(tempent)
  ent.append(tempmean)
  err.append((tempmean - tempent[samplo],tempent[samphi]-tempmean)) 

print 'samples used',Ntry
print 'estimated MI',ent
print '95% conf int.\n',err

## DISCRETE ESTIMATORS

print "\n\nTest of the discrete entropy estimators\n"
print "For z = y xor x, w/x,y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1"
x = [0,0,0,0,1,1,1,1]
y = [0,1,0,1,0,1,0,1]
z = [0,1,0,1,1,0,1,0]
print "H(x),H(y),H(z)",ee.entropyd(x),ee.entropyd(y),ee.entropyd(z)
print "H(x:y),etc",ee.midd(x,y),ee.midd(z,y),ee.midd(x,z)
print "H(x:y|z),etc",ee.cmidd(x,y,z),ee.cmidd(z,y,x),ee.cmidd(x,z,y)


## KL Div estimator
print "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set"
print "should be 0 for same distribution"
sample1 = [[2*random.random()] for i in range(200)]
sample2 = [[2*random.random()] for i in range(300)]
print 'result:',ee.kldiv(sample1,sample2)
print "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)"
sample2 = [[3+2*random.random()] for i in range(300)]
print 'result:',ee.kldiv(sample1,sample2)
Example #4
0
A = [0] * len(N)
AA = [0] * len(N)
B = [0] * len(N)
C = [0] * len(N)
D = [0] * len(N)

for i, n in enumerate(N):

	A[i] = ee.centropyd(S[ :n+tau], S[ :n]) # H(S_{n+tau} | S_n) 
	AA[i] = ee.centropyd(S[ :n+tau], E[ :n]) # H(S_{n+tau} | E_n) 
	

	B[i] = ee.midd(S[ :n+tau], E[ :n])	# I( S_{n+tau}; E_n )
	C[i] = ee.midd(S[ :n], E[ :n])		# I( S_n | E_n )

	D[i] = ee.cmidd(S[ :n+tau], E[ :n], S[ :n])		# I( S_{n+tau}; E_n | S_n )


fig, axs = plt.subplots(figsize=(12, 6), facecolor="white")

plt.plot(N, A, 'b.-',label=r'$H(S_{n+\tau}\ |\ S_n)$')
plt.plot(N, AA, 'y-',label=r'$H(S_{n+\tau}\ |\ E_n)$')



plt.plot(N, B, 'r.-', label=r'$I(S_{n+\tau}\ ;\ E_n)$')
plt.plot(N, C, 'm-', label=r'$I( S_n ; E_n )$')

plt.plot(N, D, 'g.', label=r'$I(S_{n+\tau};\ E_n\ |\ S_n )$')

plt.legend(loc=7)