def test_RMT_with_merge_case(self): init_tree() att_trees = [ATT_TREE, ATT_TREE] data = [['a2', ['a1', 'b1']], ['a1', ['a1', 'b1']], ['b2', ['a2', 'b2']], ['b2', ['a2', 'b2']], ['b1', ['a1', 'b1']], ['b1', ['a1', 'b1']]] _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 0.5) self.assertTrue(abs(result[0] - 50) <= 0.001) self.assertTrue(abs(result[1] - 0) <= 0.001) _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 0.7) self.assertTrue(abs(result[0] - 200.0 / 3) <= 0.001) self.assertTrue(abs(result[1] - 0) <= 0.001)
def get_result_m(att_tree, data, type_alg, k=DEFAULT_K, threshold=DEFAULT_T): """ change k, whle fixing size of dataset """ print "K=%d" % k print "Threshold=%.2f" % threshold print "Size of Data", len(data) data_back = copy.deepcopy(data) # for m in range(1, 100, 5): all_rncp = [] all_tncp = [] all_rtime = [] m_range = [1, 2, 3, 4, 5, M_MAX] for m in m_range: print '#' * 30 print "m=%d" % m result, eval_result = rt_anon(att_tree, data, type_alg, k, m, threshold) # save_to_file((att_tree, data, result, k, m)) data = copy.deepcopy(data_back) print "RNCP %0.2f" % eval_result[0] + "%" all_rncp.append(round(eval_result[0], 2)) print "TNCP %0.2f" % eval_result[1] + "%" all_tncp.append(round(eval_result[1], 2)) print "Running time %0.2f" % eval_result[2] + " seconds" all_rtime.append(round(eval_result[2], 2)) print "m range", m_range print "RNCP", all_rncp print "TNCP", all_tncp print "Running time", all_rtime
def get_result_k(att_tree, data, type_alg, m=DEFAULT_M, threshold=DEFAULT_T): """ change k, whle fixing size of dataset """ data_back = copy.deepcopy(data) # for k in range(5, 105, 5): print "m=%d" % m print "Threshold=%.2f" % threshold print "Size of Data", len(data) all_rncp = [] all_tncp = [] all_rtime = [] # for k in range(5, 55, 5): # if k in [2, 5, 10, 25, 50, 100]: # continue k_range = [2, 5, 10, 25, 50, 100] for k in k_range: print '#' * 30 print "K=%d" % k result, eval_result = rt_anon(att_tree, data, type_alg, k, m, threshold) # save_to_file((att_tree, data, result, k, m)) data = copy.deepcopy(data_back) print "RNCP %0.2f" % eval_result[0] + "%" all_rncp.append(round(eval_result[0], 2)) print "TNCP %0.2f" % eval_result[1] + "%" all_tncp.append(round(eval_result[1], 2)) print "Running time %0.2f" % eval_result[2] + " seconds" all_rtime.append(round(eval_result[2], 2)) print "K range", k_range print "RNCP", all_rncp print "TNCP", all_tncp print "Running time", all_rtime
def test_RMR_RMT_with_self_case(self): init_tree() att_trees = [ATT_TREE, ATT_TREE] data = [['a1', ['a1', 'b1', 'b2']], ['a1', ['a2', 'b1']], ['a2', ['a2', 'b1', 'b2']], ['a2', ['a1', 'a2', 'b2']]] _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 1.0) self.assertTrue(abs(result[1] - 0.5 * 5 / 11 * 100) <= 0.001) _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 0.1) self.assertTrue(abs(result[0] - 0) <= 0.001) self.assertTrue(abs(result[1] - 50) <= 0.001) _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 1.0) self.assertTrue(abs(result[1] - 0.5 * 5 / 11 * 100) <= 0.001) _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 0.1) self.assertTrue(abs(result[0] - 0) <= 0.001) self.assertTrue(abs(result[1] - 50) <= 0.001)
def test_RMR_with_full_threshold(self): init_tree() att_trees = [ATT_TREE, ATT_TREE] data = [['a1', ['a1']], ['a1', ['a1', 'a2']], ['b1', ['b1', 'b2']], ['b2', ['b1', 'b2']], ['b1', ['a1', 'a2', 'b2']], ['b2', ['a1', 'a2', 'b2']], ['a1', ['a1', 'a2', 'b1', 'b2']]] _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 1.0) self.assertTrue(abs(result[1] - 8 * 0.5 * 100 / 17) <= 0.001)
def get_result_one(att_tree, data, type_alg, k=DEFAULT_K, m=DEFAULT_M, threshold=DEFAULT_T): """ run RT_ANON for one time, with k=10 """ print "K=%d" % k print "Size of Data", len(data) print "m=%d" % m print "Threshold=%.2f" % threshold result, eval_result = rt_anon(att_tree, data, type_alg, k, m, threshold) # save_to_file((att_tree, data, result, k, m)) print "RNCP %0.2f" % eval_result[0] + "%" print "TNCP %0.2f" % eval_result[1] + "%" print "Running time %0.2f" % eval_result[2] + " seconds"
def get_result_dataset(att_tree, data, type_alg='RMR', k=DEFAULT_K, m=DEFAULT_M, threshold=DEFAULT_T, num_test=10): """ fix k, while changing size of dataset num_test is the test nubmber. """ print "K=%d" % k print "m=%d" % m print "Threshold=%.2f" % threshold data_back = copy.deepcopy(data) length = len(data_back) joint = 5000 datasets = [] check_time = length / joint if length % joint == 0: check_time -= 1 for i in range(check_time): datasets.append(joint * (i + 1)) # datasets.append(length) all_rncp = [] all_tncp = [] all_rtime = [] for pos in datasets: rncp = tncp = rtime = 0 if pos > length: continue print '#' * 30 print "size of dataset %d" % pos for j in range(num_test): temp = random.sample(data, pos) result, eval_result = rt_anon(att_tree, temp, type_alg, k, m, threshold) # save_to_file((att_tree, temp, result, k, m), number=j) rncp += eval_result[0] tncp += eval_result[1] rtime += eval_result[2] data = copy.deepcopy(data_back) rncp /= num_test tncp /= num_test rtime /= num_test print "RNCP %0.2f" % rncp + "%" all_rncp.append(round(rncp, 2)) print "TNCP %0.2f" % tncp + "%" all_tncp.append(round(tncp, 2)) print "Running time %0.2f" % rtime + " seconds" all_rtime.append(round(rtime, 2)) print "Size of datasets", datasets print "RNCP", all_rncp print "TNCP", all_tncp print "Running time", all_rtime
def test_RMR_with_merge_case(self): init_tree() att_trees = [ATT_TREE, ATT_TREE] data = [['a1', ['a1', 'b1', 'b2']], ['a1', ['a2', 'b1']], ['a2', ['a2', 'b1', 'b2']], ['a2', ['a1', 'a2', 'b2']], ['b1', ['a2', 'b1', 'b2']], ['b1', ['a1', 'a2', 'b2']], ['b2', ['a2', 'b1', 'b2']], ['b2', ['a1', 'a2', 'b2']]] _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 0.5) # print result self.assertTrue(abs(result[0] - 50) <= 0.001) self.assertTrue(abs(result[1] - 0.5 * 5 / 23 * 100) <= 0.001)