Python semi_partition Exemples, semi_partition.semi_partition Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : anonymizer.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

def get_result_k(att_trees, data):
    """
    change k, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    all_ncp = []
    all_rtime = []
    all_pollution = []
    # for k in range(5, 105, 5):
    for k in [2, 5, 10, 25, 50, 100]:
        _, eval_result = semi_partition(att_trees, data, k)
        data = copy.deepcopy(data_back)
        all_ncp.append(round(eval_result[0], 2))
        all_rtime.append(round(eval_result[1], 2))
        all_pollution.append(round(eval_result[2], 2))
        if __DEBUG:
            print '#' * 30
            print "K=%d" % k
            print "NCP %0.2f" % eval_result[0] + "%"
            print "Running time %0.2f" % eval_result[1] + "seconds"
            print "Missing Pollution = %.2f %%" % eval_result[2]
    print "All NCP", all_ncp
    print "All Running time", all_rtime
    print "Missing Pollution", all_pollution
    print '#' * 30

Exemple #2

0

Afficher le fichier

Fichier : anonymizer.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

def get_result_qi(att_trees, data, k=DEFAULT_K):
    """
    change nubmber of QI, whle fixing k and size of dataset
    """
    data_back = copy.deepcopy(data)
    ls = len(data[0])
    all_ncp = []
    all_rtime = []
    all_pollution = []
    for i in range(1, ls):
        _, eval_result = semi_partition(att_trees, data, k, i)
        data = copy.deepcopy(data_back)
        all_ncp.append(round(eval_result[0], 2))
        all_rtime.append(round(eval_result[1], 2))
        all_pollution.append(round(eval_result[2], 2))
        if __DEBUG:
            print '#' * 30
            print "Number of QI=%d" % i
            print "NCP %0.2f" % eval_result[0] + "%"
            print "Running time %0.2f" % eval_result[1] + "seconds"
            print "Missing Pollution = %.2f %%" % eval_result[2]
    print "All NCP", all_ncp
    print "All Running time", all_rtime
    print "Missing Pollution", all_pollution
    print '#' * 30

Exemple #3

0

Afficher le fichier

Fichier : anonymizer.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

def get_result_one(att_trees, data, k=DEFAULT_K):
    "run semi_partition for one time, with k=10"
    print "K=%d" % k
    print "Mondrian"
    data_back = copy.deepcopy(data)
    _, eval_result = semi_partition(att_trees, data, k)
    print "NCP %0.2f" % eval_result[0] + "%"
    print "Running time %0.2f" % eval_result[1] + "seconds"
    print "Missing Pollution = %.2f %%" % eval_result[2]

Exemple #4

0

Afficher le fichier

Fichier : anonymizer.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

def get_result_missing(att_trees, data, k=DEFAULT_K, n=10):
    """
    change nubmber of missing, whle fixing k, qi and size of dataset
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    qi_len = len(data[0]) - 1
    raw_missing = raw_missing_record = 0
    print "K=%d" % k
    for record in data:
        flag = False
        for value in record:
            if value == '*':
                raw_missing += 1
                flag = True
        if flag:
            raw_missing_record += 1
    # print "Missing Percentage %.2f" % (raw_missing * 100.0 / (length * qi_len)) + '%%'
    # each evaluation varies add 5% missing values
    check_percentage = [5, 10, 25, 50, 75]
    datasets = []
    for p in check_percentage:
        joint = int(0.01 * p * length * qi_len) - raw_missing
        datasets.append(joint)
    all_ncp = []
    all_rtime = []
    all_pollution = []
    for i, joint in enumerate(datasets):
        ncp = rtime = pollution = 0.0
        for j in range(n):
            gen_missing_dataset(data, joint)
            if __DEBUG:
                missing_rate(data)
            _, eval_result = semi_partition(att_trees, data, k)
            data = copy.deepcopy(data_back)
            ncp += eval_result[0]
            rtime += eval_result[1]
            pollution += eval_result[2]
        ncp /= n
        rtime /= n
        pollution /= n
        if __DEBUG:
            print "check_percentage", check_percentage[i]
            print "Add missing %d" % joint
            print "Average NCP %0.2f" % ncp + "%"
            print "Running time %0.2f" % rtime + "seconds"
            print "Missing Pollution = %.2f" % pollution + "%"
            print '#' * 30
        all_ncp.append(round(ncp, 2))
        all_rtime.append(round(rtime, 2))
        all_pollution.append(round(pollution, 2))
    print "All NCP", all_ncp
    print "All Running time", all_rtime
    print "Missing Pollution", all_pollution
    print '#' * 30

Exemple #5

0

Afficher le fichier

Fichier : test.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

 def test1_semi_partition_incompelte(self):
     init()
     data = [['6', '?', 'haha'],
             ['6', '?', 'test'],
             ['8', '2', 'haha'],
             ['8', '2', 'test'],
             ['4', '?', 'hha'],
             ['4', '?', 'hha'],
             ['4', '3', 'hha'],
             ['4', '4', 'hha']]
     result, eval_r = semi_partition(ATT_TREE, data, 2)
     # print result
     # print eval_r
     self.assertTrue(abs(eval_r[0] - 200.0 / 144) < 0.05)

Exemple #6

0

Afficher le fichier

Fichier : test.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

 def test_semi_partition_balance(self):
     init()
     data = [['6', '1', 'haha'],
             ['6', '1', 'test'],
             ['8', '2', 'haha'],
             ['8', '2', 'test'],
             ['4', '1', 'hha'],
             ['4', '1', 'hha'],
             ['1', '1', 'hha'],
             ['2', '1', 'hha']]
     result, eval_r = semi_partition(ATT_TREE, data, 2)
     # print result
     # print eval_r
     self.assertTrue(abs(eval_r[0] - 100.0 / 16) < 0.05)

Exemple #7

0

Afficher le fichier

Fichier : anonymizer.py Projet : qiyuangong/SemiPartition_for_Incomplete_Microdata

def get_result_dataset(att_trees, data, k=DEFAULT_K, n=10):
    """
    fix k and QI, while changing size of dataset
    n is the proportion nubmber.
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    print "K=%d" % k
    joint = 5000
    datasets = []
    check_time = length / joint
    if length % joint == 0:
        check_time -= 1
    for i in range(check_time):
        datasets.append(joint * (i + 1))
    datasets.append(length)
    all_ncp = []
    all_rtime = []
    all_pollution = []
    for pos in datasets:
        ncp = rtime = pollution = 0.0
        for j in range(n):
            temp = random.sample(data, pos)
            __, eval_result = semi_partition(att_trees, temp, k)
            ncp += eval_result[0]
            rtime += eval_result[1]
            pollution += eval_result[2]
            data = copy.deepcopy(data_back)
        ncp /= n
        rtime /= n
        pollution /= n
        if __DEBUG:
            print '#' * 30
            print "size of dataset %d" % pos
            print "Average NCP %0.2f" % ncp + "%"
            print "Running time %0.2f" % rtime + "seconds"
            print "Missing Pollution = %.2f %%" % pollution + "%"
        all_ncp.append(round(ncp, 2))
        all_rtime.append(round(rtime, 2))
        all_pollution.append(round(pollution, 2))
    print "All NCP", all_ncp
    print "All Running time", all_rtime
    print "Missing Pollution", all_pollution
    print '#' * 30