def get_error_measure(actual, predict): # type: (list, list) -> list actual = toolkit.make_it_list(actual) predict = toolkit.make_it_list(predict) assert len(actual) == len(predict), "length of the actual list and predict list must be the same" err = OrderedDict() if settings.predict_mode is 'CLASSIFICATION_BIN': TP = sum([1 for a, p in zip(actual, predict) if a == 1 and p == 1]) FP = sum([1 for a, p in zip(actual, predict) if a == 0 and p == 1]) FN = sum([1 for a, p in zip(actual, predict) if a == 1 and p == 0]) TN = sum([1 for a, p in zip(actual, predict) if a == 0 and p == 0]) pd = round(TP / (TP+FN), 2) pf = round(FP / (FP+TN), 2) g_measure = 2 * pd * (1 - pf) / (pd + (1 - pf)) g_measure = round(g_measure, 2) err['pd'] = pd err['pf'] = pf err['g_measure'] = g_measure if settings.predict_mode is 'CLASSIFICATION_MUL': target_names = list(sorted(set(actual))) assert True, 'Please check here. under construction...' return classification_report(actual, predict, target_names) if settings.predict_mode is 'REGRESSION': err['RMSE'] = mean_squared_error(actual, predict) ** 0.5 err['MAE'] = mean_absolute_error(actual, predict) return err
def predict_models(models, we_report_folder): """ Func: given the models list, return the prediction precision. Learners: SVM, CART, Naive Bayes Require: TrainSet, TestSet, privatized_set_folder :param models: :param we_report_folder: from which folders material to predict :return: no explicit returns. depending on whether to show or write the results... """ models = toolkit.make_it_list(models) we_report_folder = toolkit.make_it_list(we_report_folder) privatized_set_folder = we_report_folder[0] clfs = list() if settings.predict_mode is 'CLASSIFICATION_BIN': print("Predicting at the classification mode (binary)") svm_clf = svm.SVC() cart_clf = tree.DecisionTreeClassifier() nb_clf = GaussianNB() clfs = [('svm', svm_clf), ('cart', cart_clf), 'naive bayes', nb_clf] if settings.predict_mode is 'CLASSIFICATION_MUL': print("Predicting at the classification mode (multiple)") svm_clf = OneVsRestClassifier(svm.SVC(kernel='linear')) cart_clf = tree.DecisionTreeClassifier() nb_clf = OneVsRestClassifier(GaussianNB()) clfs = [('svm', svm_clf), ('cart', cart_clf), 'naive bayes', nb_clf] if settings.predict_mode is 'REGRESSION': print("Predicting at the regression mode.") lg = LinearRegression() dt_clf = tree.DecisionTreeRegressor() # TODO the neual network??! clfs = [('linear regression', lg), ('decision tree', dt_clf)] report_at = csv.writer(open('Reports/PREDICTION_report.csv', 'a')) date = time.strftime('%m%d%y') for model in models: # get the prediction at original dataset x, y = get_original_train(model) for clf_name, clf in clfs: err = predicting(x, y, model, clf) for k, v in err.items(): report_at.writerow([date, time.time(), model, 'NoHandle', clf_name, k, "%.4f"%v]) # get the refined dataset for re in we_report_folder: x, y = get_moprhed_train(re, model) for clf_name, clf in clfs: err = predicting(x, y, model, clf) for k, v in err.items(): report_at.writerow([date, time.time(), model, re, clf_name, k, "%.4f"%v])
def ipr_report(model, org_folder, we_report_folders): we_report_folders = toolkit.make_it_list(we_report_folders) project_path = settings.project_path sensitive_attributes = settings.ipr_sensitive_attrs for ptz_folder in we_report_folders: ipr = IPR(project_path + org_folder + '/' + model + '.csv', project_path + ptz_folder + '/' + model + '.csv') ipr.set_sensitive_attributes(sensitive_attributes) result = ipr.get_ipr(settings.ipr_query_size, settings.ipr_num_of_queries) with open(project_path + 'Reports/IPR_report.csv', 'a+') as f: import time w = csv.writer(f, delimiter=',', lineterminator='\n') w.writerow([time.strftime("%m%d%y"), time.time(), model, sensitive_attributes, org_folder, ptz_folder, result ])
def apriori_report(model, org_folder, we_report_folder): """ :param model: :param org_folder: :param we_report_folder: :return: """ we_report_folder = toolkit.make_it_list(we_report_folder) for ptz_folder in we_report_folder: items_org, items_ptz, rules_org, rules_ptz, dis_org_data, dis_ptz_data = \ apriori_cmpr(model, org_folder, ptz_folder) ruleId = lambda r: hash(r[0][0][0]+'0'+r[0][1][0]) rules_org_id = set(map(ruleId, rules_org)) rules_ptz_id = set(map(ruleId, rules_ptz)) lap = rules_org_id&rules_ptz_id if len(lap) == 0: pr = rr = 0 # the preserve and reveal rate else: pr = len(lap) / len(rules_org) rr = len(lap) / len(rules_ptz) with open(settings.project_path+'/Reports/APRIORI_report.csv', 'a') as f: writer = csv.writer(f) writer.writerow([time.strftime("%m%d%y"), time.time(), model, ptz_folder, len(items_org), len(items_ptz), len(rules_org), len(rules_ptz), round(pr, 3), round(rr, 3)])