def get_result_dataset(att_trees, data, type_alg, k=DEFAULT_K, num_test=10): """ fix k and QI, while changing size of dataset num_test is the test nubmber. """ data_back = copy.deepcopy(data) length = len(data_back) print "K=%d" % k joint = 5000 datasets = [] check_time = length / joint if length % joint == 0: check_time -= 1 for i in range(check_time): datasets.append(joint * (i + 1)) datasets.append(length) all_ncp = [] all_rtime = [] for pos in datasets: ncp = rtime = 0 print '#' * 30 print "size of dataset %d" % pos for j in range(num_test): temp = random.sample(data, pos) _, eval_result = clustering_based_k_anon(att_trees, temp, type_alg, k) ncp += eval_result[0] rtime += eval_result[1] data = copy.deepcopy(data_back) ncp /= num_test rtime /= num_test print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + " seconds" print '#' * 30
def get_result_one(att_trees, data, type_alg, k=DEFAULT_K): "run clustering_based_k_anon for one time, with k=10" print "K=%d" % k data_back = copy.deepcopy(data) _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + "seconds"
def get_result_one(att_trees, data, type_alg, k=DEFAULT_K): "run clustering_based_k_anon for one time, with k=10" print "K=%d" % k data_back = copy.deepcopy(data) result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) write_to_file(result) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + "seconds"
def get_result_one(att_trees, data, type_alg, k=10): "run clustering_based_k_anon for one time, with k=10" print("K=%d" % k) data_back = copy.deepcopy(data) result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) write_to_file(result) data = copy.deepcopy(data_back) print("NCP %0.2f" % eval_result[0] + "%") print("Running time %0.2f" % eval_result[1] + "seconds")
def get_result_k(att_trees, data, type_alg): """ change k, whle fixing QD and size of dataset """ data_back = copy.deepcopy(data) for k in range(5, 55, 5): print '#' * 30 print "K=%d" % k result, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def test1_oka(self): init() data = [['6', '1', 'haha'], ['6', '1', 'test'], ['8', '2', 'haha'], ['8', '2', 'test'], ['4', '1', 'hha'], ['4', '1', 'hha'], ['4', '3', 'hha'], ['4', '3', 'hha']] result, eval_r = clustering_based_k_anon(ATT_TREE, data, 'oka', 2) try: self.assertTrue(abs(eval_r[0] - 0) < 0.05) except AssertionError: print data print result print eval_r self.assertEqual(0, 1)
def test2_k_nn(self): init() data = [['6', '1', 'haha'], ['6', '1', 'test'], ['8', '2', 'haha'], ['8', '2', 'test'], ['4', '1', 'hha'], ['4', '2', 'hha'], ['4', '3', 'hha'], ['4', '4', 'hha']] result, eval_r = clustering_based_k_anon(ATT_TREE, data, 'knn', 2) try: self.assertTrue(abs(eval_r[0] - 2.77) < 0.05) except AssertionError: print(data) print(result) print(eval_r) self.assertEqual(0, 1)
def get_result_dataset(att_trees, data, type_alg, k=DEFAULT_K, n=10): """ fix k and QI, while changing size of dataset n is the proportion nubmber. """ data_back = copy.deepcopy(data) length = len(data_back) print "K=%d" % k joint = 5000 datasets = [] check_time = length / joint if length % joint == 0: check_time -= 1 for i in range(check_time): datasets.append(joint * (i + 1)) datasets.append(length) all_ncp = [] all_rtime = [] for pos in datasets: ncp = rtime = 0 print '#' * 30 print "size of dataset %d" % pos for j in range(n): temp = random.sample(data, pos) result, eval_result = clustering_based_k_anon( att_trees, temp, type_alg, k) ncp += eval_result[0] rtime += eval_result[1] data = copy.deepcopy(data_back) result_file = FILE_NAME + "_resultfile_" + str(TYPE_ALG) + ".csv" with open(result_file, "w") as rf: for r in result: outstring = "" sepBy = "," for val in r: outstring += val outstring += sepBy rf.write(outstring) rf.write("\n") ncp /= n rtime /= n print "Average NCP %0.2f" % ncp + "%" all_ncp.append(round(ncp, 2)) print "Running time %0.2f" % rtime + "seconds" all_rtime.append(round(rtime, 2)) print '#' * 30 print "All NCP", all_ncp print "All Running time", all_rtime
def get_result_qi(att_trees, data, type_alg, k=DEFAULT_K): """ change nubmber of QI, whle fixing k and size of dataset """ data_back = copy.deepcopy(data) num_data = len(data[0]) print "L=%d" % k for i in reversed(range(1, num_data)): print '#' * 30 print "Number of QI=%d" % i _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k, i) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_n(att_trees, data, type_alg, k=10, n=10): """ run clustering_based_k_anon for n time, with k=10 """ print("K=%d" % k) data_back = copy.deepcopy(data) n_ncp = 0.0 n_time = 0.0 for i in range(n): _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) data = copy.deepcopy(data_back) n_ncp += eval_result[0] n_time += eval_result[1] n_ncp = n_ncp / n n_time = n_ncp / n print("Run %d times" % n) print("NCP %0.2f" % n_ncp + "%") print("Running time %0.2f" % n_time + " seconds")
def test2_k_nn(self): init() data = [['6', '1', 'haha'], ['6', '1', 'test'], ['8', '2', 'haha'], ['8', '2', 'test'], ['4', '1', 'hha'], ['4', '2', 'hha'], ['4', '3', 'hha'], ['4', '4', 'hha']] result, eval_r = clustering_based_k_anon(ATT_TREE, data, 'knn', 2) try: self.assertTrue(abs(eval_r[0] - 2.77) < 0.05) except AssertionError: print data print result print eval_r self.assertEqual(0, 1)
def get_result_n(att_trees, data, type_alg, k=DEFAULT_K, n=10): """ run clustering_based_k_anon for n time, with k=10 """ print "K=%d" % k data_back = copy.deepcopy(data) n_ncp = 0.0 n_time = 0.0 for i in range(n): _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) data = copy.deepcopy(data_back) n_ncp += eval_result[0] n_time += eval_result[1] n_ncp = n_ncp / n n_time = n_ncp / n print "Run %d times" % n print "NCP %0.2f" % n_ncp + "%" print "Running time %0.2f" % n_time + " seconds"
def get_result_k(att_trees, data, type_alg): """ change k, whle fixing QD and size of dataset """ data_back = copy.deepcopy(data) all_ncp = [] all_rtime = [] # for k in range(5, 105, 5): for k in [2, 5, 10, 25, 50, 100]: print '#' * 30 print "K=%d" % k _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" all_ncp.append(round(eval_result[0], 2)) print "Running time %0.2f" % eval_result[1] + "seconds" all_rtime.append(round(eval_result[1], 2)) print "All NCP", all_ncp print "All Running time", all_rtime
def get_result_k(att_trees, data, type_alg): """ change k, whle fixing QD and size of dataset """ data_back = copy.deepcopy(data) all_ncp = [] all_rtime = [] # for k in range(50,100,50): for k in [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]: print('#' * 30) print("K=%d" % k) _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) data = copy.deepcopy(data_back) print("NCP %0.2f" % eval_result[0] + "%") all_ncp.append(round(eval_result[0], 2)) print("Running time %0.2f" % eval_result[1] + "seconds") all_rtime.append(round(eval_result[1], 2)) print("All NCP", all_ncp) print("All Running time", all_rtime)
def get_result_qi(att_trees, data, type_alg, k=DEFAULT_K): """ change nubmber of QI, whle fixing k and size of dataset """ data_back = copy.deepcopy(data) ls = len(data[0]) all_ncp = [] all_rtime = [] for i in range(1, ls): print '#' * 30 print "Number of QI=%d" % i _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k, i) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" all_ncp.append(round(eval_result[0], 2)) print "Running time %0.2f" % eval_result[1] + "seconds" all_rtime.append(round(eval_result[1], 2)) print "All NCP", all_ncp print "All Running time", all_rtime
def get_result_dataset(att_trees, data, type_alg, k=DEFAULT_K, n=10): """ fix k and QI, while changing size of dataset n is the proportion nubmber. """ data_back = copy.deepcopy(data) length = len(data_back) print "K=%d" % k joint = 5000 datasets = [] check_time = length / joint if length % joint == 0: check_time -= 1 for i in range(check_time): datasets.append(joint * (i + 1)) datasets.append(length) all_ncp = [] all_rtime = [] for pos in datasets: ncp = rtime = 0 print '#' * 30 print "size of dataset %d" % pos for j in range(n): temp = random.sample(data, pos) result, eval_result = clustering_based_k_anon( att_trees, temp, type_alg, k) ncp += eval_result[0] rtime += eval_result[1] data = copy.deepcopy(data_back) # save_to_file((att_trees, temp, result, k, L)) ncp /= n rtime /= n print "Average NCP %0.2f" % ncp + "%" all_ncp.append(round(ncp, 2)) print "Running time %0.2f" % rtime + "seconds" all_rtime.append(round(rtime, 2)) print '#' * 30 print "All NCP", all_ncp print "All Running time", all_rtime