Ejemplo n.º 1
0
def get_error_measure(actual, predict):
    # type: (list, list) -> list
    actual = toolkit.make_it_list(actual)
    predict = toolkit.make_it_list(predict)

    assert len(actual) == len(predict), "length of the actual list and predict list must be the same"

    err = OrderedDict()

    if settings.predict_mode is 'CLASSIFICATION_BIN':
        TP = sum([1 for a, p in zip(actual, predict) if a == 1 and p == 1])
        FP = sum([1 for a, p in zip(actual, predict) if a == 0 and p == 1])
        FN = sum([1 for a, p in zip(actual, predict) if a == 1 and p == 0])
        TN = sum([1 for a, p in zip(actual, predict) if a == 0 and p == 0])

        pd = round(TP / (TP+FN), 2)
        pf = round(FP / (FP+TN), 2)
        g_measure = 2 * pd * (1 - pf) / (pd + (1 - pf))
        g_measure = round(g_measure, 2)

        err['pd'] = pd
        err['pf'] = pf
        err['g_measure'] = g_measure

    if settings.predict_mode is 'CLASSIFICATION_MUL':
        target_names = list(sorted(set(actual)))
        assert True, 'Please check here. under construction...'
        return classification_report(actual, predict, target_names)

    if settings.predict_mode is 'REGRESSION':
        err['RMSE'] = mean_squared_error(actual, predict) ** 0.5
        err['MAE'] = mean_absolute_error(actual, predict)

    return err
Ejemplo n.º 2
0
def predict_models(models, we_report_folder):
    """
    Func: given the models list, return the prediction precision.
    Learners: SVM, CART, Naive Bayes
    Require: TrainSet, TestSet, privatized_set_folder
    :param models:
    :param we_report_folder: from which folders material to predict
    :return: no explicit returns. depending on whether to show or write the results...
    """

    models = toolkit.make_it_list(models)
    we_report_folder = toolkit.make_it_list(we_report_folder)

    privatized_set_folder = we_report_folder[0]

    clfs = list()

    if settings.predict_mode is 'CLASSIFICATION_BIN':
        print("Predicting at the classification mode (binary)")
        svm_clf = svm.SVC()
        cart_clf = tree.DecisionTreeClassifier()
        nb_clf = GaussianNB()
        clfs = [('svm', svm_clf), ('cart', cart_clf), 'naive bayes', nb_clf]

    if settings.predict_mode is 'CLASSIFICATION_MUL':
        print("Predicting at the classification mode (multiple)")
        svm_clf = OneVsRestClassifier(svm.SVC(kernel='linear'))
        cart_clf = tree.DecisionTreeClassifier()
        nb_clf = OneVsRestClassifier(GaussianNB())
        clfs = [('svm', svm_clf), ('cart', cart_clf), 'naive bayes', nb_clf]

    if settings.predict_mode is 'REGRESSION':
        print("Predicting at the regression mode.")

        lg = LinearRegression()
        dt_clf = tree.DecisionTreeRegressor()
        # TODO the neual network??!
        clfs = [('linear regression', lg), ('decision tree', dt_clf)]

    report_at = csv.writer(open('Reports/PREDICTION_report.csv', 'a'))
    date = time.strftime('%m%d%y')

    for model in models:
        # get the prediction at original dataset
        x, y = get_original_train(model)
        for clf_name, clf in clfs:
            err = predicting(x, y, model, clf)
            for k, v in err.items(): report_at.writerow([date, time.time(), model, 'NoHandle', clf_name, k, "%.4f"%v])

        # get the refined dataset
        for re in we_report_folder:
            x, y = get_moprhed_train(re, model)
            for clf_name, clf in clfs:
                err = predicting(x, y, model, clf)
                for k, v in err.items(): report_at.writerow([date, time.time(), model, re, clf_name, k, "%.4f"%v])
Ejemplo n.º 3
0
def ipr_report(model, org_folder, we_report_folders):
    we_report_folders = toolkit.make_it_list(we_report_folders)
    project_path = settings.project_path
    sensitive_attributes = settings.ipr_sensitive_attrs

    for ptz_folder in we_report_folders:
        ipr = IPR(project_path + org_folder + '/' + model + '.csv',
                  project_path + ptz_folder + '/' + model + '.csv')

        ipr.set_sensitive_attributes(sensitive_attributes)
        result = ipr.get_ipr(settings.ipr_query_size, settings.ipr_num_of_queries)

        with open(project_path + 'Reports/IPR_report.csv', 'a+') as f:
            import time
            w = csv.writer(f, delimiter=',', lineterminator='\n')
            w.writerow([time.strftime("%m%d%y"),
                        time.time(),
                        model,
                        sensitive_attributes,
                        org_folder,
                        ptz_folder,
                        result
                        ])
Ejemplo n.º 4
0
def apriori_report(model, org_folder, we_report_folder):
    """

    :param model:
    :param org_folder:
    :param we_report_folder:
    :return:
    """
    we_report_folder = toolkit.make_it_list(we_report_folder)

    for ptz_folder in we_report_folder:
        items_org, items_ptz, rules_org, rules_ptz, dis_org_data, dis_ptz_data = \
            apriori_cmpr(model, org_folder, ptz_folder)

        ruleId = lambda r: hash(r[0][0][0]+'0'+r[0][1][0])

        rules_org_id = set(map(ruleId, rules_org))
        rules_ptz_id = set(map(ruleId, rules_ptz))

        lap = rules_org_id&rules_ptz_id

        if len(lap) == 0:
            pr = rr = 0  # the preserve and reveal rate
        else:
            pr = len(lap) / len(rules_org)
            rr = len(lap) / len(rules_ptz)

        with open(settings.project_path+'/Reports/APRIORI_report.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow([time.strftime("%m%d%y"), time.time(), model, ptz_folder,
                             len(items_org),
                             len(items_ptz),
                             len(rules_org),
                             len(rules_ptz),
                             round(pr, 3),
                             round(rr, 3)])