Beispiel #1
0
def cmidcutd(x, y, z, slice_num=20):
    #under condition z
    x_vec = np.transpose(np.array([x]))
    y_vec = np.transpose(np.array([slice(y, slice_num)]))
    z_vec = np.transpose(np.array([z]))
    return ee.cmidd(x_vec, y_vec, z_vec)
Beispiel #2
0
def casual_entropy(i, j, K, data):

    x = data[i]
    y = data[j]
    if len(K) == 0:
        return casual_entropy_empty(i, j, K, data)

    #slice_num = int(np.power(len(x),1.0/2)/2)
    slice_num = int(np.power(len(x), 0.4) / 2)

    if x.dtype == 'float64':
        x = slice(x, slice_num)
        x_vec = np.array([[s] for s in x])
    else:
        x_vec = np.array([[s] for s in x])

    if y.dtype == "float64":
        y = slice(y, slice_num)
        y_vec = np.array([[s] for s in y])
    else:
        y_vec = np.array([[s] for s in y])
        print("index j " + j + "is discrete")

    z_all = []
    for k in K:
        z = data[k]
        if z.dtype == "float64":
            z = slice(z, slice_num)
            z_vec = np.array([[s] for s in z])
        else:
            z_vec = np.array([[s] for s in z])
        z_all.append(z_vec)
    z_combine = np.c_[tuple(z_all)]

    #x_clone = np.copy(x_vec)
    y_clone = np.copy(y_vec)
    ns = 200
    ci = 0.95
    outputs = []
    #outputs2 = []
    for i in range(ns):
        np.random.shuffle(y_clone)
        outputs.append(ee.cmidd(x_vec, y_clone, z_combine, base=2))
        # outputs2.append(ee.midd(x_clone,y_vec,base=2))
    outputs.sort()
    # outputs2.sort()

    v = ee.cmidd(x_vec, y_vec, z_combine, base=2)
    ave = np.mean(outputs)
    ci0 = outputs[int((1. - ci) / 2 * ns)]
    ci1 = outputs[int((1. + ci) / 2 * ns)]

    if v > ci1:
        if_large_zero = True
    else:
        if_large_zero = False

    n = 200
    useful_result = if_large_zero * v
    std_modified = np.sqrt((n - 1) / n) * np.std(outputs)
    # multi = abs(v-ave)/np.std(outputs)
    multi = 0

    #(statistic, pvalue) = stats.ttest_ind_from_stats(mean1=ave, std1=std_modified, nobs1=200, mean2=v, std2=0, nobs2=2,equal_val=False)
    # res = stats.ttest_1samp(np.array(outputs),[ave,0])
    statistic = 1

    print("index " + j + " function: casual_entropy, the length of data",
          len(x), "the slice number is", slice_num, "useful value is ",
          useful_result, "multi sigma is ", multi)
    # print("statistic, pvalue",statistic,pvalue)

    return useful_result, v, ave, (
        ci0, ci1), if_large_zero, abs(statistic) * if_large_zero
Beispiel #3
0
 def cmi(self, X, Y, Z):
     np.random.seed(0)
     return ee.cmidd(X.copy(order='C'),
                     Y.copy(order='C'),
                     z=Z.copy(order='C'))
Beispiel #4
0
    err.append((tempmean - tempent[samplo], tempent[samphi] - tempmean))

print('samples used', Ntry)
print('estimated MI', ent)
print('95% conf int.\n', err)

# DISCRETE ESTIMATORS

print("\n\nTest of the discrete entropy estimators\n")
print("For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1")
x = [0, 0, 0, 0, 1, 1, 1, 1]
y = [0, 1, 0, 1, 0, 1, 0, 1]
z = [0, 1, 0, 1, 1, 0, 1, 0]
print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z))
print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z))
print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y))


# KL Div estimator
print("\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set")
print("should be 0 for same distribution")
sample1 = [[2 * random.random()] for i in range(200)]
sample2 = [[2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
print("should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)")
sample2 = [[3 + 2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))


def test_discrete(size=1000, y_func=lambda x: x**2):
    print("\nTest discrete.")
Beispiel #5
0
print('samples used', Ntry)
print('estimated MI', ent)
print('95% conf int.\n', err)

# DISCRETE ESTIMATORS

print("\n\nTest of the discrete entropy estimators\n")
print(
    "For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1"
)
x = [0, 0, 0, 0, 1, 1, 1, 1]
y = [0, 1, 0, 1, 0, 1, 0, 1]
z = [0, 1, 0, 1, 1, 0, 1, 0]
print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z))
print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z))
print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y))

# KL Div estimator
print(
    "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set"
)
print("should be 0 for same distribution")
sample1 = [[2 * random.random()] for i in range(200)]
sample2 = [[2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
print(
    "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)"
)
sample2 = [[3 + 2 * random.random()] for i in range(300)]
print('result:', ee.kldiv(sample1, sample2))
Beispiel #6
0
def first_plot():
    el = EdgeList()
    file_name = './BA_network_all.xlsx'
    el.load_records(file_name)
    #el.smooth_and_normalize_records(sl_normalize_indices=['degree'],smooth_length=100)
    degree, if_rand = merge(el.records["degree"], el.records_random["degree"])
    distances, if_rand = merge(el.records["distance"],
                               el.records_random["distance"])
    print(if_rand.dtype == 'int64')
    print(degree.dtype == 'float64')
    #print(len(distances),len(if_rand),len(degree))

    step_num = len(if_rand)
    step_size = step_num // 10
    edge_nums = np.arange(step_size, len(if_rand), step_size)
    ent1 = []
    ent2 = []
    ent3 = []
    ent4 = []
    ent5 = []
    ent6 = []
    ent0 = []
    ent7 = []
    ents = {}

    for i in edge_nums:
        if_rand_cut = if_rand[i - step_size:i]
        degree_cut = degree[i - step_size:i]
        distances_cut = distances[i - step_size:i]
        #print(len(if_rand_cut),len(degree_cut))

        k = int(np.sqrt(len(degree_cut)))
        slice_num = 20

        slice_num = int(np.power(len(degree_cut), 1.0 / 3))
        # print("length of datas, number of spaces",len(degree_cut),slice_num)
        #print(k)
        ent1.append(ep.midcut(if_rand_cut, degree_cut, slice_num=slice_num))
        ent2.append(ep.midc(if_rand_cut, degree_cut, k=20))

        # ent0.append(ep.cmidcutd(if_rand_cut, degree_cut,distances_cut))
        k = 20
        a, b = ep.cmiddc(if_rand_cut, distances_cut, degree_cut, k=k)
        ent6.append(a)
        ent7.append(b)

        ent3.append(ep.cmiddcut(if_rand_cut, distances_cut, degree_cut))
        if_rand_cut_copy = if_rand_cut.copy()
        random.shuffle(if_rand_cut_copy)

        ent4.append(ep.cmiddcut(if_rand_cut_copy, distances_cut, degree_cut))
        random.shuffle(distances_cut)
        ent5.append(ep.cmiddcut(if_rand_cut, distances_cut, degree_cut))

        #ent5.append(ep.cmidcutd(if_rand_cut,degree_cut,distances_cut))
        #print(a.all(),b.all(),c.all())
        #ent5.append(ep.cmicut(if_rand_cut, degree_cut,distances_cut))
        #ent6.append(ep.cmi(if_rand_cut, degree_cut,distances_cut))

    print('len')
    print(len(ent6))
    print(len(ent0))
    # print(ent6)

    # x_vec = np.array([[s] for s in if_rand ])
    # y_vec = np.array([[s] for s in degree ])

    # print("midc",ee.midc(x_vec,y_vec,base=2,k=20))
    # print("midc",ep.midc(if_rand,degree,k=40,base=2))
    # print(ee.shuffle_test(ee.midc,x_vec,y_vec,base=2,k=10))

    # print(ep.midc(if_rand,degree,k=20,base=2))

    slice_num = int(np.power(len(degree), 1.0 / 2))

    print(ep.midd(if_rand, distances))
    x_vec = np.transpose(np.array([if_rand]))
    y_vec = np.transpose(np.array([distances]))
    z_vec = np.transpose(np.array([ep.slice(degree, slice_num)]))
    print(ee.shuffle_test(ee.midd, x_vec, y_vec, base=2))
    print(ee.shuffle_test(ee.midd, x_vec, z_vec, base=2))
    print(ee.cmidd(x_vec, z_vec, y_vec, base=2))
    # print(ee.shuffle_test(ee.cmidd,x_vec,y_vec,z_vec,base=2))

    # plt.plot(ent0,label = r'0')
    # plt.plot(ent6,label = r"6")
    plt.plot(ent7, label=r"7")
    plt.plot(ent1, 'vb-', label=r'1')
    plt.plot(ent2, 'or--', label=r'2')
    plt.plot(ent3, label=r'3')
    plt.plot(ent4, label=r'4')
    plt.plot(ent5, label=r'5')
    # plt.plot(edge_nums, ent5, 'b',label='5')
    # plt.plot(edge_nums, ent6, 'b--',label='6')
    plt.legend()
    plt.show()