def get_result_missing(att_trees, data, k=DEFAULT_K, n=10): """ change nubmber of missing, whle fixing k, qi and size of dataset """ data_back = copy.deepcopy(data) length = len(data_back) qi_len = len(data[0]) - 1 raw_missing = raw_missing_record = 0 print "K=%d" % k for record in data: flag = False for value in record: if value == '*': raw_missing += 1 flag = True if flag: raw_missing_record += 1 # print "Missing Percentage %.2f" % (raw_missing * 100.0 / (length * qi_len)) + '%%' # each evaluation varies add 5% missing values check_percentage = [5, 10, 25, 50, 75] datasets = [] for p in check_percentage: joint = int(0.01 * p * length * qi_len) - raw_missing datasets.append(joint) all_ncp = [] all_rtime = [] all_pollution = [] for i, joint in enumerate(datasets): ncp = rtime = pollution = 0.0 for j in range(n): gen_missing_dataset(data, joint) if __DEBUG: missing_rate(data) _, eval_result = semi_partition(att_trees, data, k) data = copy.deepcopy(data_back) ncp += eval_result[0] rtime += eval_result[1] pollution += eval_result[2] ncp /= n rtime /= n pollution /= n if __DEBUG: print "check_percentage", check_percentage[i] print "Add missing %d" % joint print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + "seconds" print "Missing Pollution = %.2f" % pollution + "%" print '#' * 30 all_ncp.append(round(ncp, 2)) all_rtime.append(round(rtime, 2)) all_pollution.append(round(pollution, 2)) print "All NCP", all_ncp print "All Running time", all_rtime print "Missing Pollution", all_pollution print '#' * 30
def get_result_one(att_trees, data, k=DEFAULT_K): "run mondrian for one time, with k=10" print "K=%d" % k data_back = copy.deepcopy(data) missing_rate(data) _, eval_result = mondrian_delete_missing(att_trees, data, k) print "Mondrian" print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + "seconds" print "Missing Pollution = %.2f %%" % eval_result[2] data = copy.deepcopy(data_back) _, eval_result = mondrian(att_trees, data, k) print "Enhanced Mondrian" print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + "seconds" print "Missing Pollution = %.2f %%" % eval_result[2]
def get_result_missing(att_trees, data, k=DEFAULT_K, n=DEFAULT_K): """ change nubmber of missing, whle fixing k, qi and size of dataset """ data_back = copy.deepcopy(data) length = len(data_back) qi_len = len(data[0]) - 1 raw_missing = raw_missing_record = 0 print "K=%d" % k for record in data: flag = False for value in record: if value == '*': raw_missing += 1 flag = True if flag: raw_missing_record += 1 # print "Missing Percentage %.2f" % (raw_missing * 100.0 / (length * qi_len)) + '%%' # each evaluation varies add 5% missing values check_percentage = [5, 10, 25, 50, 75] datasets = [] for p in check_percentage: joint = int(0.01 * p * length * qi_len) - raw_missing datasets.append(joint) all_ncp = [] all_rtime = [] all_pollution = [] deletion_all_ncp = [] deletion_all_rtime = [] for i, joint in enumerate(datasets): ncp = rtime = pollution = 0.0 for j in range(n): gen_missing_dataset(data, joint) if __DEBUG: missing_rate(data) _, eval_result = mondrian(att_trees, data, k) data = copy.deepcopy(data_back) ncp += eval_result[0] rtime += eval_result[1] pollution += eval_result[2] ncp /= n rtime /= n pollution /= n if __DEBUG: print "check_percentage", check_percentage[i] print "Add missing %d" % joint print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + "seconds" print "Missing Pollution = %.2f" % pollution + "%" print '#' * 30 all_ncp.append(round(ncp, 2)) all_rtime.append(round(rtime, 2)) all_pollution.append(round(pollution, 2)) ncp = rtime = pollution = 0.0 for j in range(n): gen_missing_dataset(data, joint) if __DEBUG: missing_rate(data) _, eval_result = mondrian_delete_missing(att_trees, data, k) data = copy.deepcopy(data_back) ncp += eval_result[0] rtime += eval_result[1] ncp /= n rtime /= n if __DEBUG: print "Add missing %d" % joint print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + "seconds" print "Missing Pollution = %.2f" % pollution + "%" print '#' * 30 deletion_all_ncp.append(round(ncp, 2)) deletion_all_rtime.append(round(rtime, 2)) print "Mondrian" print "All NCP", deletion_all_ncp print "All Running time", deletion_all_rtime print "Enhanced Mondrian" print "All NCP", all_ncp print "All Running time", all_rtime print "Missing Pollution", all_pollution print '#' * 30