def weightinginfo(feature1,feature2,im1,im2): #weight=ee.mi(feature1,feature2) '''X=feature1 Y=feature2 Xn=0 Xm=10 Yn=0 Ym=10 X=np.asarray(X) Y=np.asarray(Y) X=utils.quantise(X, 10, uniform='sampling', minmax=None, centers=False) print X Y=utils.quantise(Y, 10, uniform='sampling', minmax=None, centers=False) sys = systems.DiscreteSystem(X,(Xn,Xm),Y,(Yn,Ym)) sys.calculate_entropies(method='qe',calc=['HX','HXY','HiXY','HshXY']) weight=sys.I()''' feature1=feature1.tolist() feature2=feature2.tolist() weight=ee.kldiv(feature1,feature2,k=3,base=2) print 'weight is', weight return weight
print('samples used', Ntry) print('estimated MI', ent) print('95% conf int.\n', err) # DISCRETE ESTIMATORS print("\n\nTest of the discrete entropy estimators\n") print( "For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1" ) x = [0, 0, 0, 0, 1, 1, 1, 1] y = [0, 1, 0, 1, 0, 1, 0, 1] z = [0, 1, 0, 1, 1, 0, 1, 0] print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z)) print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z)) print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y)) # KL Div estimator print( "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set" ) print("should be 0 for same distribution") sample1 = [[2 * random.random()] for i in range(200)] sample2 = [[2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2)) print( "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)" ) sample2 = [[3 + 2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2))
ax[0].set_xlabel('Percent White') plt.tight_layout() fig.savefig('figures/MN_GA_density.pdf') plt.close('all') sns.set_style('white') fig, ax = plt.subplots(1, 2, figsize=(6, 2)) # bins=np.arange(-.1,1.1,.05) mn = EE.kldiv(EE.vectorize(np.random.normal(.82, np.sqrt(.82 * (1 - .82) / 100), 1000)), EE.vectorize(MN_values), k=20) ga = EE.kldiv(EE.vectorize(np.random.normal(.55, np.sqrt(.55 * (1 - .55) / 100), 1000)), EE.vectorize(GA_values), k=20) objects = [mn, ga] y_pos = np.arange(len(objects)) ax[0].bar(y_pos * 2 + 1, objects, align='center', color=['darkturquoise', 'magenta']) # plt.xticks(y_pos, objects) # plt.ylabel('Usage') # plt.title('Programming language usage') ax[0].set_xticks(y_pos * 2 + 1) ax[0].set_xticklabels(['Minnesota', 'Georgia']) ax[0].set_xlim([0, 4]) ax[1].bar(y_pos * 2 + 1, [9.2, 13.3], align='center', color=['darkturquoise', 'magenta']) # plt.xticks(y_pos, objects)
after_c = after["raw core"].values before_xyz = [] before_xyzc = [] for j in range(len(before_x)): before_xyz.append([before_x[j], before_y[j], before_z[j]]) before_xyzc.append( [before_x[j], before_y[j], before_z[j], before_c[j]]) after_xyz = [] after_xyzc = [] for j in range(len(after_x)): after_xyz.append([after_x[j], after_y[j], after_z[j]]) after_xyzc.append([after_x[j], after_y[j], after_z[j], after_c[j]]) kl_xyz, pw_xyz = ee.kldiv(before_xyz, after_xyz, return_pw=True) plt.clf() ax = fig.add_subplot(111, projection='3d') cs = ax.scatter(before_x, before_y, before_z, c=pw_xyz, cmap=cm.jet) plt.title("StudyID:" + meta_data["Study"][i] + "\nType:" + meta_data['Type'][i] + "\nKL divergence= " + ("%.3f" % kl_xyz)) plt.colorbar(cs) plt.savefig("figs/" + meta_data['Type'][i] + "_" + meta_data["Study"][i] + ".jpg", bbox_inches='tight') f_out = open( "pointwise_kl/" + meta_data['Type'][i] + "_" + meta_data["Study"][i] + ".csv", 'w') f_out.write("X,Y,Z,pointwise_kl_divergence\n")
tempent.sort() tempmean = np.mean(tempent) ent.append(tempmean) err.append((tempmean - tempent[samplo],tempent[samphi]-tempmean)) print 'samples used',Ntry print 'estimated MI',ent print '95% conf int.\n',err ## DISCRETE ESTIMATORS print "\n\nTest of the discrete entropy estimators\n" print "For z = y xor x, w/x,y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1" x = [0,0,0,0,1,1,1,1] y = [0,1,0,1,0,1,0,1] z = [0,1,0,1,1,0,1,0] print "H(x),H(y),H(z)",ee.entropyd(x),ee.entropyd(y),ee.entropyd(z) print "H(x:y),etc",ee.midd(x,y),ee.midd(z,y),ee.midd(x,z) print "H(x:y|z),etc",ee.cmidd(x,y,z),ee.cmidd(z,y,x),ee.cmidd(x,z,y) ## KL Div estimator print "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set" print "should be 0 for same distribution" sample1 = [[2*random.random()] for i in range(200)] sample2 = [[2*random.random()] for i in range(300)] print 'result:',ee.kldiv(sample1,sample2) print "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)" sample2 = [[3+2*random.random()] for i in range(300)] print 'result:',ee.kldiv(sample1,sample2)
pass else: data_inter.append([val]) with open('../data/edges-by-type/{}_intra.dat'.format(prefix)) as ofile: for line in ofile: val = float(line) if val == 0 or math.isnan(val): pass else: data_intra.append([float(line)]) with open('../data/edges-by-type/{}_multi.dat'.format(prefix)) as ofile: for line in ofile: val = float(line) if val == 0 or math.isnan(val): pass else: data_multi.append([float(line)]) K = 5 KLxy = ee.kldiv(data_inter[:5000], data_intra[:5000], k = K) KLyx = ee.kldiv(data_intra[:5000], data_inter[:5000], k = K) JS = 0.5*(KLxy + KLyx) print label_type, weight_type, JS