コード例 #1
0
def get_result_dataset(att_trees, data, type_alg, k=DEFAULT_K, num_test=10):
    """
    fix k and QI, while changing size of dataset
    num_test is the test nubmber.
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    print "K=%d" % k
    joint = 5000
    datasets = []
    check_time = length / joint
    if length % joint == 0:
        check_time -= 1
    for i in range(check_time):
        datasets.append(joint * (i + 1))
    datasets.append(length)
    all_ncp = []
    all_rtime = []
    for pos in datasets:
        ncp = rtime = 0
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(num_test):
            temp = random.sample(data, pos)
            _, eval_result = clustering_based_k_anon(att_trees, temp, type_alg, k)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
        ncp /= num_test
        rtime /= num_test
        print "Average NCP %0.2f" % ncp + "%"
        print "Running time %0.2f" % rtime + " seconds"
        print '#' * 30
コード例 #2
0
def get_result_one(att_trees, data, type_alg, k=DEFAULT_K):
    "run clustering_based_k_anon for one time, with k=10"
    print "K=%d" % k
    data_back = copy.deepcopy(data)
    _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
    print "NCP %0.2f" % eval_result[0] + "%"
    print "Running time %0.2f" % eval_result[1] + "seconds"
コード例 #3
0
def get_result_one(att_trees, data, type_alg, k=DEFAULT_K):
    "run clustering_based_k_anon for one time, with k=10"
    print "K=%d" % k
    data_back = copy.deepcopy(data)
    result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
    write_to_file(result)
    data = copy.deepcopy(data_back)
    print "NCP %0.2f" % eval_result[0] + "%"
    print "Running time %0.2f" % eval_result[1] + "seconds"
コード例 #4
0
def get_result_one(att_trees, data, type_alg, k=10):
    "run clustering_based_k_anon for one time, with k=10"
    print("K=%d" % k)
    data_back = copy.deepcopy(data)
    result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
    write_to_file(result)
    data = copy.deepcopy(data_back)
    print("NCP %0.2f" % eval_result[0] + "%")
    print("Running time %0.2f" % eval_result[1] + "seconds")
コード例 #5
0
def get_result_k(att_trees, data, type_alg):
    """
    change k, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    for k in range(5, 55, 5):
        print '#' * 30
        print "K=%d" % k
        result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
コード例 #6
0
 def test1_oka(self):
     init()
     data = [['6', '1', 'haha'], ['6', '1', 'test'], ['8', '2', 'haha'],
             ['8', '2', 'test'], ['4', '1', 'hha'], ['4', '1', 'hha'],
             ['4', '3', 'hha'], ['4', '3', 'hha']]
     result, eval_r = clustering_based_k_anon(ATT_TREE, data, 'oka', 2)
     try:
         self.assertTrue(abs(eval_r[0] - 0) < 0.05)
     except AssertionError:
         print data
         print result
         print eval_r
         self.assertEqual(0, 1)
コード例 #7
0
 def test2_k_nn(self):
     init()
     data = [['6', '1', 'haha'], ['6', '1', 'test'], ['8', '2', 'haha'],
             ['8', '2', 'test'], ['4', '1', 'hha'], ['4', '2', 'hha'],
             ['4', '3', 'hha'], ['4', '4', 'hha']]
     result, eval_r = clustering_based_k_anon(ATT_TREE, data, 'knn', 2)
     try:
         self.assertTrue(abs(eval_r[0] - 2.77) < 0.05)
     except AssertionError:
         print(data)
         print(result)
         print(eval_r)
         self.assertEqual(0, 1)
コード例 #8
0
def get_result_dataset(att_trees, data, type_alg, k=DEFAULT_K, n=10):
    """
    fix k and QI, while changing size of dataset
    n is the proportion nubmber.
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    print "K=%d" % k
    joint = 5000
    datasets = []
    check_time = length / joint
    if length % joint == 0:
        check_time -= 1
    for i in range(check_time):
        datasets.append(joint * (i + 1))
    datasets.append(length)
    all_ncp = []
    all_rtime = []
    for pos in datasets:
        ncp = rtime = 0
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(n):
            temp = random.sample(data, pos)
            result, eval_result = clustering_based_k_anon(
                att_trees, temp, type_alg, k)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
            result_file = FILE_NAME + "_resultfile_" + str(TYPE_ALG) + ".csv"

            with open(result_file, "w") as rf:
                for r in result:
                    outstring = ""
                    sepBy = ","
                    for val in r:
                        outstring += val
                        outstring += sepBy

                    rf.write(outstring)
                    rf.write("\n")
        ncp /= n
        rtime /= n
        print "Average NCP %0.2f" % ncp + "%"
        all_ncp.append(round(ncp, 2))
        print "Running time %0.2f" % rtime + "seconds"
        all_rtime.append(round(rtime, 2))
    print '#' * 30
    print "All NCP", all_ncp
    print "All Running time", all_rtime
コード例 #9
0
def get_result_qi(att_trees, data, type_alg, k=DEFAULT_K):
    """
    change nubmber of QI, whle fixing k and size of dataset
    """
    data_back = copy.deepcopy(data)
    num_data = len(data[0])
    print "L=%d" % k
    for i in reversed(range(1, num_data)):
        print '#' * 30
        print "Number of QI=%d" % i
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k, i)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
コード例 #10
0
def get_result_n(att_trees, data, type_alg, k=10, n=10):
    """
    run clustering_based_k_anon for n time, with k=10
    """
    print("K=%d" % k)
    data_back = copy.deepcopy(data)
    n_ncp = 0.0
    n_time = 0.0
    for i in range(n):
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
        data = copy.deepcopy(data_back)
        n_ncp += eval_result[0]
        n_time += eval_result[1]
    n_ncp = n_ncp / n
    n_time = n_ncp / n
    print("Run %d times" % n)
    print("NCP %0.2f" % n_ncp + "%")
    print("Running time %0.2f" % n_time + " seconds")
コード例 #11
0
 def test2_k_nn(self):
     init()
     data = [['6', '1', 'haha'],
             ['6', '1', 'test'],
             ['8', '2', 'haha'],
             ['8', '2', 'test'],
             ['4', '1', 'hha'],
             ['4', '2', 'hha'],
             ['4', '3', 'hha'],
             ['4', '4', 'hha']]
     result, eval_r = clustering_based_k_anon(ATT_TREE, data, 'knn', 2)
     try:
         self.assertTrue(abs(eval_r[0] - 2.77) < 0.05)
     except AssertionError:
         print data
         print result
         print eval_r
         self.assertEqual(0, 1)
コード例 #12
0
def get_result_n(att_trees, data, type_alg, k=DEFAULT_K, n=10):
    """
    run clustering_based_k_anon for n time, with k=10
    """
    print "K=%d" % k
    data_back = copy.deepcopy(data)
    n_ncp = 0.0
    n_time = 0.0
    for i in range(n):
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
        data = copy.deepcopy(data_back)
        n_ncp += eval_result[0]
        n_time += eval_result[1]
    n_ncp = n_ncp / n
    n_time = n_ncp / n
    print "Run %d times" % n
    print "NCP %0.2f" % n_ncp + "%"
    print "Running time %0.2f" % n_time + " seconds"
コード例 #13
0
def get_result_k(att_trees, data, type_alg):
    """
    change k, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    all_ncp = []
    all_rtime = []
    # for k in range(5, 105, 5):
    for k in [2, 5, 10, 25, 50, 100]:
        print '#' * 30
        print "K=%d" % k
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        all_ncp.append(round(eval_result[0], 2))
        print "Running time %0.2f" % eval_result[1] + "seconds"
        all_rtime.append(round(eval_result[1], 2))
    print "All NCP", all_ncp
    print "All Running time", all_rtime
コード例 #14
0
def get_result_k(att_trees, data, type_alg):
    """
    change k, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    all_ncp = []
    all_rtime = []
    # for k in range(5, 105, 5):
    for k in [2, 5, 10, 25, 50, 100]:
        print '#' * 30
        print "K=%d" % k
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        all_ncp.append(round(eval_result[0], 2))
        print "Running time %0.2f" % eval_result[1] + "seconds"
        all_rtime.append(round(eval_result[1], 2))
    print "All NCP", all_ncp
    print "All Running time", all_rtime
コード例 #15
0
def get_result_k(att_trees, data, type_alg):
    """
    change k, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    all_ncp = []
    all_rtime = []
    # for k in range(50,100,50):
    for k in [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]:
        print('#' * 30)
        print("K=%d" % k)
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k)
        data = copy.deepcopy(data_back)
        print("NCP %0.2f" % eval_result[0] + "%")
        all_ncp.append(round(eval_result[0], 2))
        print("Running time %0.2f" % eval_result[1] + "seconds")
        all_rtime.append(round(eval_result[1], 2))
    print("All NCP", all_ncp)
    print("All Running time", all_rtime)
コード例 #16
0
def get_result_qi(att_trees, data, type_alg, k=DEFAULT_K):
    """
    change nubmber of QI, whle fixing k and size of dataset
    """
    data_back = copy.deepcopy(data)
    ls = len(data[0])
    all_ncp = []
    all_rtime = []
    for i in range(1, ls):
        print '#' * 30
        print "Number of QI=%d" % i
        _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k,
                                                 i)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        all_ncp.append(round(eval_result[0], 2))
        print "Running time %0.2f" % eval_result[1] + "seconds"
        all_rtime.append(round(eval_result[1], 2))
    print "All NCP", all_ncp
    print "All Running time", all_rtime
コード例 #17
0
def get_result_qi(att_trees, data, type_alg, k=DEFAULT_K):
    """
    change nubmber of QI, whle fixing k and size of dataset
    """
    data_back = copy.deepcopy(data)
    ls = len(data[0])
    all_ncp = []
    all_rtime = []
    for i in range(1, ls):
        print '#' * 30
        print "Number of QI=%d" % i
        _, eval_result = clustering_based_k_anon(att_trees,
                                                 data, type_alg, k, i)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        all_ncp.append(round(eval_result[0], 2))
        print "Running time %0.2f" % eval_result[1] + "seconds"
        all_rtime.append(round(eval_result[1], 2))
    print "All NCP", all_ncp
    print "All Running time", all_rtime
コード例 #18
0
def get_result_dataset(att_trees, data, type_alg, k=DEFAULT_K, n=10):
    """
    fix k and QI, while changing size of dataset
    n is the proportion nubmber.
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    print "K=%d" % k
    joint = 5000
    datasets = []
    check_time = length / joint
    if length % joint == 0:
        check_time -= 1
    for i in range(check_time):
        datasets.append(joint * (i + 1))
    datasets.append(length)
    all_ncp = []
    all_rtime = []
    for pos in datasets:
        ncp = rtime = 0
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(n):
            temp = random.sample(data, pos)
            result, eval_result = clustering_based_k_anon(
                att_trees, temp, type_alg, k)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
            # save_to_file((att_trees, temp, result, k, L))
        ncp /= n
        rtime /= n
        print "Average NCP %0.2f" % ncp + "%"
        all_ncp.append(round(ncp, 2))
        print "Running time %0.2f" % rtime + "seconds"
        all_rtime.append(round(rtime, 2))
    print '#' * 30
    print "All NCP", all_ncp
    print "All Running time", all_rtime