Esempio n. 1
0
def compare_H1_H2_structural_attack(bG, aG, bins):
    
    cand_size, bin_size, sig_list, bucket_list = equivalence_class_H1(bG, bins)
    print "H1-bG", bin_size
    cand_size, bin_size, sig_list, bucket_list = equivalence_class_H1(aG, bins)
    print "H1-aG", bin_size
    
    h2_list = equivalence_class_H2_open(bG, None)       # open world
    cand_size, bin_size, sig_list, bucket_list = bucket_H2(h2_list, bins)
    print "H2-open-bG", bin_size
    h2_list = equivalence_class_H2_open(aG, None)
    cand_size, bin_size, sig_list, bucket_list = bucket_H2(h2_list, bins)
    print "H2-open-aG", bin_size
Esempio n. 2
0
def compare_H1_H2_structural_attack(bG, aG, bins):
    
    cand_size, bin_size, sig_list, bucket_list = equivalence_class_H1(bG, bins)
    print "H1-bG", bin_size
    cand_size, bin_size, sig_list, bucket_list = equivalence_class_H1(aG, bins)
    print "H1-aG", bin_size
    
    h2_list = equivalence_class_H2_open(bG, None)       # open world
    cand_size, bin_size, sig_list, bucket_list = bucket_H2(h2_list, bins)
    print "H2-open-bG", bin_size
    h2_list = equivalence_class_H2_open(aG, None)
    cand_size, bin_size, sig_list, bucket_list = bucket_H2(h2_list, bins)
    print "H2-open-aG", bin_size
def incorrectness_uncertain_from_file(before_file, after_file, sample_file, n_samples, bins): 
    
    # compute sig_list_b, bucket_list_b ONCE !
    start = time.clock()
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
#    G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    print "read bG: DONE, elapsed :", time.clock() - start
    
    h2_list = equivalence_class_H2_open(bG, None)
    cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins)
#    print "len B:", len(sig_list_b), len(bucket_list_b)
    
    # H1 score, H2 score
    start = time.clock()
    score_H1 = 0.0
    score_H2 = 0.0
    count = 0
    for i in range(n_samples):
        file_name = sample_file + str(i)
        aG = nx.read_edgelist(file_name, '#', '\t', create_using=nx.MultiGraph(), nodetype=int, data=False)     # IMPORTANT: MultiGraph
        # H1
        sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins)
        score_H1 += sum_re_prob
        # H2
        sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins)
        score_H2 += sum_re_prob
        print "count =", count
        count += 1
    #
    score_H1 = score_H1/n_samples
    score_H2 = score_H2/n_samples
    print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start
    
    # 
    return score_H1, score_H2
Esempio n. 4
0
def incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins):

    h2_list = equivalence_class_H2_open(aG, None)
    cand_size, bin_size, sig_list_a, bucket_list_a = bucket_H2(h2_list, bins)
    #    print "len A:", len(sig_list_a), len(bucket_list_a)

    # compute incorrectness score
    re_prob_dict = {}  # re_prob_dict[u] = reidentification probability of u
    for id_b in range(len(sig_list_b)):
        sig_b = sig_list_b[id_b]

        # 1 - binary search in sig_list_a
        lo = 0
        hi = len(sig_list_a) - 1
        while True:
            mid = (lo + hi) / 2
            sig_a = sig_list_a[mid]
            #
            if list_comparator(sig_a, sig_b) == 0:
                set_a = set(bucket_list_a[mid])
                for u in bucket_list_b[id_b]:
                    if u in set_a:
                        re_prob_dict[u] = 1.0 / len(set_a)
                    else:
                        re_prob_dict[u] = 0.0
                break
            #
            if list_comparator(sig_b, sig_a) < 0:
                hi = mid - 1
                if hi < lo:
                    break
            if list_comparator(sig_b, sig_a) > 0:
                lo = mid + 1
                if lo > hi:
                    break

        # 2 - linear search in sig_list_a
#        for id_a in range(len(sig_list_a)):
#            sig_a = sig_list_a[id_a]
#            if list_comparator(sig_a, sig_b) == 0:      # need more effective Binary Search
#                set_a = set(bucket_list_a[id_a])
#                for u in bucket_list_b[id_b]:
#                    if u in set_a:
#                        re_prob_dict[u] = 1.0/len(set_a)
#                    else:
#                        re_prob_dict[u] = 0.0
#                break

#
    sum_re_prob = sum(re_prob_dict.itervalues())
    return sum_re_prob, re_prob_dict
def incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins):
    
    h2_list = equivalence_class_H2_open(aG, None)
    cand_size, bin_size, sig_list_a, bucket_list_a = bucket_H2(h2_list, bins)
#    print "len A:", len(sig_list_a), len(bucket_list_a)
    
    # compute incorrectness score
    re_prob_dict = {}       # re_prob_dict[u] = reidentification probability of u
    for id_b in range(len(sig_list_b)):
        sig_b = sig_list_b[id_b]
        
        # 1 - binary search in sig_list_a 
        lo = 0                                          
        hi = len(sig_list_a)-1
        while True:
            mid = (lo+hi)/2
            sig_a = sig_list_a[mid]
            #
            if list_comparator(sig_a, sig_b) == 0:
                set_a = set(bucket_list_a[mid])
                for u in bucket_list_b[id_b]:  
                    if u in set_a:
                        re_prob_dict[u] = 1.0/len(set_a)
                    else:
                        re_prob_dict[u] = 0.0
                break
            #
            if list_comparator(sig_b, sig_a) < 0:
                hi = mid-1
                if hi < lo:
                    break
            if list_comparator(sig_b, sig_a) > 0:
                lo = mid+1
                if lo > hi:
                    break
            
        # 2 - linear search in sig_list_a 
#        for id_a in range(len(sig_list_a)):            
#            sig_a = sig_list_a[id_a]
#            if list_comparator(sig_a, sig_b) == 0:      # need more effective Binary Search
#                set_a = set(bucket_list_a[id_a])
#                for u in bucket_list_b[id_b]:  
#                    if u in set_a:
#                        re_prob_dict[u] = 1.0/len(set_a)
#                    else:
#                        re_prob_dict[u] = 0.0
#                break
        
    #
    sum_re_prob = sum(re_prob_dict.itervalues())
    return sum_re_prob, re_prob_dict
Esempio n. 6
0
def incorrectness_uncertain_from_file(before_file, after_file, sample_file,
                                      n_samples, bins):

    # compute sig_list_b, bucket_list_b ONCE !
    start = time.clock()
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
    #    G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    print "read bG: DONE, elapsed :", time.clock() - start

    h2_list = equivalence_class_H2_open(bG, None)
    cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins)
    #    print "len B:", len(sig_list_b), len(bucket_list_b)

    # H1 score, H2 score
    start = time.clock()
    score_H1 = 0.0
    score_H2 = 0.0
    count = 0
    for i in range(n_samples):
        file_name = sample_file + str(i)
        aG = nx.read_edgelist(file_name,
                              '#',
                              '\t',
                              create_using=nx.MultiGraph(),
                              nodetype=int,
                              data=False)  # IMPORTANT: MultiGraph
        # H1
        sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins)
        score_H1 += sum_re_prob
        # H2
        sum_re_prob, re_prob_dict = incorrectness_H2_open(
            aG, sig_list_b, bucket_list_b, bins)
        score_H2 += sum_re_prob
        print "count =", count
        count += 1
    #
    score_H1 = score_H1 / n_samples
    score_H2 = score_H2 / n_samples
    print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start

    #
    return score_H1, score_H2
Esempio n. 7
0
def incorrectness_uncertain(before_file, after_file, bins):

    # compute sig_list_b, bucket_list_b ONCE !
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
    G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)

    h2_list = equivalence_class_H2_open(bG, None)
    cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins)
    #    print "len B:", len(sig_list_b), len(bucket_list_b)

    # list of sampled graphs
    g_list = []
    start = time.clock()
    for i in range(N_SAMPLES):
        g_list.append(generate_sample(G))
    print "Sampling graphs - Elapsed ", (time.clock() - start)

    # H1 score
    start = time.clock()
    score_H1 = 0.0
    for aG in g_list:
        sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins)
        score_H1 += sum_re_prob
    score_H1 = score_H1 / N_SAMPLES
    print "compute score_H1: DONE, elapsed :", time.clock() - start

    # H2 score
    score_H2 = 0.0
    count = 0
    for aG in g_list:
        sum_re_prob, re_prob_dict = incorrectness_H2_open(
            aG, sig_list_b, bucket_list_b, bins)
        score_H2 += sum_re_prob
        print "count =", count
        count += 1
    score_H2 = score_H2 / N_SAMPLES

    #
    return score_H1, score_H2
def incorrectness_uncertain(before_file, after_file, bins): 
    
    # compute sig_list_b, bucket_list_b ONCE !
    bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int)
    G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True)
    
    h2_list = equivalence_class_H2_open(bG, None)
    cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins)
#    print "len B:", len(sig_list_b), len(bucket_list_b)
    
    # list of sampled graphs
    g_list = [] 
    start = time.clock()
    for i in range(N_SAMPLES):
        g_list.append(generate_sample(G))
    print "Sampling graphs - Elapsed ", (time.clock() - start)
    
    # H1 score
    start = time.clock()
    score_H1 = 0.0
    for aG in g_list:
        sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins)
        score_H1 += sum_re_prob
    score_H1 = score_H1/N_SAMPLES
    print "compute score_H1: DONE, elapsed :", time.clock() - start
    
    
    # H2 score
    score_H2 = 0.0
    count = 0
    for aG in g_list:
        sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins)
        score_H2 += sum_re_prob
        print "count =", count
        count += 1
    score_H2 = score_H2/N_SAMPLES
    
    # 
    return score_H1, score_H2