Esempio n. 1
0
def weightinginfo(feature1,feature2,im1,im2):
    #weight=ee.mi(feature1,feature2)
    '''X=feature1
    Y=feature2
    Xn=0
    Xm=10
    Yn=0
    Ym=10
    X=np.asarray(X)
    Y=np.asarray(Y)
    X=utils.quantise(X, 10, uniform='sampling', minmax=None, centers=False)
    print X
    Y=utils.quantise(Y, 10, uniform='sampling', minmax=None, centers=False)
    sys = systems.DiscreteSystem(X,(Xn,Xm),Y,(Yn,Ym))
    sys.calculate_entropies(method='qe',calc=['HX','HXY','HiXY','HshXY'])
    weight=sys.I()'''
    
    feature1=feature1.tolist()
    
    feature2=feature2.tolist()
    weight=ee.kldiv(feature1,feature2,k=3,base=2)
    print 'weight is', weight
    return weight
Esempio n. 2
0
print('samples used', Ntry)
print('estimated MI', ent)
print('95% conf int.\n', err)

# DISCRETE ESTIMATORS

print("\n\nTest of the discrete entropy estimators\n")
print(
    "For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1"
)
x = [0, 0, 0, 0, 1, 1, 1, 1]
y = [0, 1, 0, 1, 0, 1, 0, 1]
z = [0, 1, 0, 1, 1, 0, 1, 0]
print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z))
print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z))
print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y))

# KL Div estimator
print(
    "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set"
)
print("should be 0 for same distribution")
sample1 = [[2 * random.random()] for i in range(200)]
sample2 = [[2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
print(
    "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)"
)
sample2 = [[3 + 2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
Esempio n. 3
0
    ax[0].set_xlabel('Percent White')


    plt.tight_layout()

    fig.savefig('figures/MN_GA_density.pdf')

    plt.close('all')

    sns.set_style('white')
    fig, ax = plt.subplots(1, 2, figsize=(6, 2))
    # bins=np.arange(-.1,1.1,.05)



    mn = EE.kldiv(EE.vectorize(np.random.normal(.82, np.sqrt(.82 * (1 - .82) / 100), 1000)), EE.vectorize(MN_values),
                  k=20)
    ga = EE.kldiv(EE.vectorize(np.random.normal(.55, np.sqrt(.55 * (1 - .55) / 100), 1000)), EE.vectorize(GA_values),
                  k=20)
    objects = [mn, ga]
    y_pos = np.arange(len(objects))

    ax[0].bar(y_pos * 2 + 1, objects, align='center', color=['darkturquoise', 'magenta'])
    # plt.xticks(y_pos, objects)
    # plt.ylabel('Usage')
    # plt.title('Programming language usage')
    ax[0].set_xticks(y_pos * 2 + 1)
    ax[0].set_xticklabels(['Minnesota', 'Georgia'])
    ax[0].set_xlim([0, 4])

    ax[1].bar(y_pos * 2 + 1, [9.2, 13.3], align='center', color=['darkturquoise', 'magenta'])
    # plt.xticks(y_pos, objects)
    after_c = after["raw core"].values

    before_xyz = []
    before_xyzc = []
    for j in range(len(before_x)):
        before_xyz.append([before_x[j], before_y[j], before_z[j]])
        before_xyzc.append(
            [before_x[j], before_y[j], before_z[j], before_c[j]])

    after_xyz = []
    after_xyzc = []
    for j in range(len(after_x)):
        after_xyz.append([after_x[j], after_y[j], after_z[j]])
        after_xyzc.append([after_x[j], after_y[j], after_z[j], after_c[j]])

    kl_xyz, pw_xyz = ee.kldiv(before_xyz, after_xyz, return_pw=True)

    plt.clf()
    ax = fig.add_subplot(111, projection='3d')
    cs = ax.scatter(before_x, before_y, before_z, c=pw_xyz, cmap=cm.jet)
    plt.title("StudyID:" + meta_data["Study"][i] + "\nType:" +
              meta_data['Type'][i] + "\nKL divergence= " + ("%.3f" % kl_xyz))
    plt.colorbar(cs)
    plt.savefig("figs/" + meta_data['Type'][i] + "_" + meta_data["Study"][i] +
                ".jpg",
                bbox_inches='tight')

    f_out = open(
        "pointwise_kl/" + meta_data['Type'][i] + "_" + meta_data["Study"][i] +
        ".csv", 'w')
    f_out.write("X,Y,Z,pointwise_kl_divergence\n")
Esempio n. 5
0
  tempent.sort()
  tempmean = np.mean(tempent)
  ent.append(tempmean)
  err.append((tempmean - tempent[samplo],tempent[samphi]-tempmean)) 

print 'samples used',Ntry
print 'estimated MI',ent
print '95% conf int.\n',err

## DISCRETE ESTIMATORS

print "\n\nTest of the discrete entropy estimators\n"
print "For z = y xor x, w/x,y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1"
x = [0,0,0,0,1,1,1,1]
y = [0,1,0,1,0,1,0,1]
z = [0,1,0,1,1,0,1,0]
print "H(x),H(y),H(z)",ee.entropyd(x),ee.entropyd(y),ee.entropyd(z)
print "H(x:y),etc",ee.midd(x,y),ee.midd(z,y),ee.midd(x,z)
print "H(x:y|z),etc",ee.cmidd(x,y,z),ee.cmidd(z,y,x),ee.cmidd(x,z,y)


## KL Div estimator
print "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set"
print "should be 0 for same distribution"
sample1 = [[2*random.random()] for i in range(200)]
sample2 = [[2*random.random()] for i in range(300)]
print 'result:',ee.kldiv(sample1,sample2)
print "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)"
sample2 = [[3+2*random.random()] for i in range(300)]
print 'result:',ee.kldiv(sample1,sample2)
Esempio n. 6
0
					pass
				else:
					data_inter.append([val])

		with open('../data/edges-by-type/{}_intra.dat'.format(prefix)) as ofile:
			for line in ofile:
				val = float(line)

				if val == 0 or math.isnan(val):
					pass
				else:
					data_intra.append([float(line)])

		with open('../data/edges-by-type/{}_multi.dat'.format(prefix)) as ofile:
			for line in ofile:
				val = float(line)

				if val == 0 or math.isnan(val):
					pass
				else:
					data_multi.append([float(line)])

		K = 5

		KLxy = ee.kldiv(data_inter[:5000], data_intra[:5000], k = K)

		KLyx = ee.kldiv(data_intra[:5000], data_inter[:5000], k = K)

		JS = 0.5*(KLxy + KLyx)

		print label_type, weight_type, JS