def test_RMT_with_merge_case(self):
     init_tree()
     att_trees = [ATT_TREE, ATT_TREE]
     data = [['a2', ['a1', 'b1']],
             ['a1', ['a1', 'b1']],
             ['b2', ['a2', 'b2']],
             ['b2', ['a2', 'b2']],
             ['b1', ['a1', 'b1']],
             ['b1', ['a1', 'b1']]]
     _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 0.5)
     self.assertTrue(abs(result[0] - 50) <= 0.001)
     self.assertTrue(abs(result[1] - 0) <= 0.001)
     _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 0.7)
     self.assertTrue(abs(result[0] - 200.0 / 3) <= 0.001)
     self.assertTrue(abs(result[1] - 0) <= 0.001)
def get_result_m(att_tree, data, type_alg, k=DEFAULT_K, threshold=DEFAULT_T):
    """
    change k, whle fixing size of dataset
    """
    print "K=%d" % k
    print "Threshold=%.2f" % threshold
    print "Size of Data", len(data)
    data_back = copy.deepcopy(data)
    # for m in range(1, 100, 5):
    all_rncp = []
    all_tncp = []
    all_rtime = []
    m_range = [1, 2, 3, 4, 5, M_MAX]
    for m in m_range:
        print '#' * 30
        print "m=%d" % m
        result, eval_result = rt_anon(att_tree, data, type_alg, k, m, threshold)
        # save_to_file((att_tree, data, result, k, m))
        data = copy.deepcopy(data_back)
        print "RNCP %0.2f" % eval_result[0] + "%"
        all_rncp.append(round(eval_result[0], 2))
        print "TNCP %0.2f" % eval_result[1] + "%"
        all_tncp.append(round(eval_result[1], 2))
        print "Running time %0.2f" % eval_result[2] + " seconds"
        all_rtime.append(round(eval_result[2], 2))
    print "m range", m_range
    print "RNCP", all_rncp
    print "TNCP", all_tncp
    print "Running time", all_rtime
def get_result_k(att_tree, data, type_alg, m=DEFAULT_M, threshold=DEFAULT_T):
    """
    change k, whle fixing size of dataset
    """
    data_back = copy.deepcopy(data)
    # for k in range(5, 105, 5):
    print "m=%d" % m
    print "Threshold=%.2f" % threshold
    print "Size of Data", len(data)
    all_rncp = []
    all_tncp = []
    all_rtime = []
    # for k in range(5, 55, 5):
    #     if k in [2, 5, 10, 25, 50, 100]:
    #         continue
    k_range = [2, 5, 10, 25, 50, 100]
    for k in k_range:
        print '#' * 30
        print "K=%d" % k
        result, eval_result = rt_anon(att_tree, data, type_alg, k, m, threshold)
        # save_to_file((att_tree, data, result, k, m))
        data = copy.deepcopy(data_back)
        print "RNCP %0.2f" % eval_result[0] + "%"
        all_rncp.append(round(eval_result[0], 2))
        print "TNCP %0.2f" % eval_result[1] + "%"
        all_tncp.append(round(eval_result[1], 2))
        print "Running time %0.2f" % eval_result[2] + " seconds"
        all_rtime.append(round(eval_result[2], 2))
    print "K range", k_range
    print "RNCP", all_rncp
    print "TNCP", all_tncp
    print "Running time", all_rtime
 def test_RMR_RMT_with_self_case(self):
     init_tree()
     att_trees = [ATT_TREE, ATT_TREE]
     data = [['a1', ['a1', 'b1', 'b2']],
             ['a1', ['a2', 'b1']],
             ['a2', ['a2', 'b1', 'b2']],
             ['a2', ['a1', 'a2', 'b2']]]
     _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 1.0)
     self.assertTrue(abs(result[1] - 0.5 * 5 / 11 * 100) <= 0.001)
     _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 0.1)
     self.assertTrue(abs(result[0] - 0) <= 0.001)
     self.assertTrue(abs(result[1] - 50) <= 0.001)
     _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 1.0)
     self.assertTrue(abs(result[1] - 0.5 * 5 / 11 * 100) <= 0.001)
     _, result = rt_anon(att_trees, data, 'RMT', 2, 2, 0.1)
     self.assertTrue(abs(result[0] - 0) <= 0.001)
     self.assertTrue(abs(result[1] - 50) <= 0.001)
 def test_RMR_with_full_threshold(self):
     init_tree()
     att_trees = [ATT_TREE, ATT_TREE]
     data = [['a1', ['a1']],
             ['a1', ['a1', 'a2']],
             ['b1', ['b1', 'b2']],
             ['b2', ['b1', 'b2']],
             ['b1', ['a1', 'a2', 'b2']],
             ['b2', ['a1', 'a2', 'b2']],
             ['a1', ['a1', 'a2', 'b1', 'b2']]]
     _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 1.0)
     self.assertTrue(abs(result[1] - 8 * 0.5 * 100 / 17) <= 0.001)
def get_result_one(att_tree, data, type_alg, k=DEFAULT_K, m=DEFAULT_M, threshold=DEFAULT_T):
    """
    run RT_ANON for one time, with k=10
    """
    print "K=%d" % k
    print "Size of Data", len(data)
    print "m=%d" % m
    print "Threshold=%.2f" % threshold
    result, eval_result = rt_anon(att_tree, data, type_alg, k, m, threshold)
    # save_to_file((att_tree, data, result, k, m))
    print "RNCP %0.2f" % eval_result[0] + "%"
    print "TNCP %0.2f" % eval_result[1] + "%"
    print "Running time %0.2f" % eval_result[2] + " seconds"
def get_result_dataset(att_tree, data, type_alg='RMR',
                       k=DEFAULT_K, m=DEFAULT_M, threshold=DEFAULT_T, num_test=10):
    """
    fix k, while changing size of dataset
    num_test is the test nubmber.
    """
    print "K=%d" % k
    print "m=%d" % m
    print "Threshold=%.2f" % threshold
    data_back = copy.deepcopy(data)
    length = len(data_back)
    joint = 5000
    datasets = []
    check_time = length / joint
    if length % joint == 0:
        check_time -= 1
    for i in range(check_time):
        datasets.append(joint * (i + 1))
    # datasets.append(length)
    all_rncp = []
    all_tncp = []
    all_rtime = []
    for pos in datasets:
        rncp = tncp = rtime = 0
        if pos > length:
            continue
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(num_test):
            temp = random.sample(data, pos)
            result, eval_result = rt_anon(att_tree, temp, type_alg, k, m, threshold)
            # save_to_file((att_tree, temp, result, k, m), number=j)
            rncp += eval_result[0]
            tncp += eval_result[1]
            rtime += eval_result[2]
            data = copy.deepcopy(data_back)
        rncp /= num_test
        tncp /= num_test
        rtime /= num_test
        print "RNCP %0.2f" % rncp + "%"
        all_rncp.append(round(rncp, 2))
        print "TNCP %0.2f" % tncp + "%"
        all_tncp.append(round(tncp, 2))
        print "Running time %0.2f" % rtime + " seconds"
        all_rtime.append(round(rtime, 2))
    print "Size of datasets", datasets
    print "RNCP", all_rncp
    print "TNCP", all_tncp
    print "Running time", all_rtime
 def test_RMR_with_merge_case(self):
     init_tree()
     att_trees = [ATT_TREE, ATT_TREE]
     data = [['a1', ['a1', 'b1', 'b2']],
             ['a1', ['a2', 'b1']],
             ['a2', ['a2', 'b1', 'b2']],
             ['a2', ['a1', 'a2', 'b2']],
             ['b1', ['a2', 'b1', 'b2']],
             ['b1', ['a1', 'a2', 'b2']],
             ['b2', ['a2', 'b1', 'b2']],
             ['b2', ['a1', 'a2', 'b2']]]
     _, result = rt_anon(att_trees, data, 'RMR', 2, 2, 0.5)
     # print result
     self.assertTrue(abs(result[0] - 50) <= 0.001)
     self.assertTrue(abs(result[1] - 0.5 * 5 / 23 * 100) <= 0.001)