def test_DA_partition_case2(self): init_tree() trans = [['a1'], ['a1', 'a2'], ['b1', 'b2'], ['b1', 'b2'], ['a1', 'a2', 'b2'], ['a1', 'a2', 'b2'], ['a1', 'a2', 'b1', 'b2']] _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 4) # not {'a1': 'A', 'a2': 'A', 'b1': 'B', 'b2': 'B'} self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'}) _, result = apriori_based_anon(ATT_TREE, trans, 'AA', 2, 2) self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'})
def test_AA_with_AA_case(self): init_tree() trans = [['a1', 'b1', 'b2'], ['a2', 'b1'], ['a2', 'b1', 'b2'], ['a1', 'a2', 'b2']] _, result = apriori_based_anon(ATT_TREE, trans, 'AA', 2, 2) self.assertEqual(result[2], {'a1': 'A', 'a2': 'A'}) _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 2) self.assertEqual(result[2], {'a1': 'A', 'a2': 'A'})
def test_DA_partition_case2(self): init_tree() trans = [['a1'], ['a1', 'a2'], ['b1', 'b2'], ['b1', 'b2'], ['a1', 'a2', 'b2'], ['a1', 'a2', 'b2'], ['a1', 'a2', 'b1', 'b2']] _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 4) # not {'a1': 'A', 'a2': 'A', 'b1': 'B', 'b2': 'B'} self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'}) _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 2) self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'})
def get_result_dataset(att_tree, data, type_alg='AA', k=DEFALUT_K, num_test=10): """ fix k, while changing size of dataset num_test is the test nubmber. """ print "K=%d" % k print "m=%d" % DEFALUT_M data_back = copy.deepcopy(data) length = len(data_back) joint = 5000 dataset_num = length / joint if length % joint == 0: dataset_num += 1 for i in range(1, dataset_num + 1): pos = i * joint ncp = rtime = 0 if pos > length: continue print '#' * 30 print "size of dataset %d" % pos for j in range(num_test): temp = random.sample(data, pos) _, eval_result = apriori_based_anon(att_tree, temp, type_alg, k) ncp += eval_result[0] rtime += eval_result[1] data = copy.deepcopy(data_back) ncp /= num_test rtime /= num_test print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + " seconds" print '#' * 30
def get_result_one(att_tree, data, type_alg, k=DEFALUT_K): """ run apriori_based_anon for one time, with k=10 """ print "K=%d" % k print "Size of Data", len(data) print "m=%d" % DEFALUT_M _, eval_result = apriori_based_anon(att_tree, data, type_alg, k) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_k(att_tree, data, type_alg): """ change k, whle fixing size of dataset """ data_back = copy.deepcopy(data) # for k in range(5, 105, 5): print "m=%d" % DEFALUT_M print "Size of Data", len(data) for k in [2, 5, 10, 25, 50]: print '#' * 30 print "K=%d" % k result, eval_result = apriori_based_anon(att_tree, data, type_alg, k) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_m(att_tree, data, type_alg, k=DEFALUT_K): """ change k, whle fixing size of dataset """ print "K=%d" % k print "Size of Data", len(data) data_back = copy.deepcopy(data) # for m in range(1, 100, 5): for m in [1, 2, 3, 4, 5, M_MAX]: print '#' * 30 print "m=%d" % m result, eval_result = apriori_based_anon(att_tree, data, type_alg, k, m) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def T_Gen(trans, k=25, m=2): """transaction generalization based on AA """ # using AA to generalization transaction part result, eval_result = apriori_based_anon(ATT_TREES[-1], trans, 'AA', k, m) return result, eval_result
def test_DA(self): init_tree() trans = [['a1', 'b1', 'b2'], ['a2', 'b1'], ['a2', 'b1', 'b2'], ['a1', 'a2', 'b2']] _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 2) self.assertEqual(result[2], {'a1': 'A', 'a2': 'A'})