Beispiel #1
0
def mae2(data_raw, privacy_budget, best_f):


    best_and_selected2 = select_ci2(1, backup_solutions, best_f, mcd, u2_dict)

    print_cs('best+u2选出来的特征', data_raw[best_and_selected2])
    print_cs('best+u2选出来的特征', data_raw[best_and_selected2], float(mcd))

    # 我们的算法,使用mcd为阈值
    u2 = algo_2_count.noise_count_error(data_raw[best_and_selected2],
                                        funcs.cs(data_raw[best_and_selected2], mcd)['CS_i'],
                                        privacy_budget)

    # cs,阈值为默认0.5
    mae_cs = algo_2_count.noise_count_error(data_raw[best_and_selected2],
                                            funcs.cs(data_raw[best_and_selected2])['CS_i'],
                                            privacy_budget)
    # GS,阈值为默认0.5
    mae_gs = algo_2_count.noise_count_error(data_raw[best_and_selected2],
                                            funcs.cs(data_raw[best_and_selected2])['GS'],
                                            privacy_budget)
    # print('u2:' + str(u2))
    # print('u2cs:' + str(mae_cs))
    # print('u2gs:' + str(mae_gs))

    return u2, mae_cs, mae_gs
Beispiel #2
0
def MAE1(privacy_budget):
    result1 = selectCi1(1, partitionC(adjustment_features), best_features)

    u1 = algo_2_count.noise_count_error(data[result1],
                                        funcs.cs(data[result1], mcd)['CS_i'],
                                        privacy_budget)
    print(u1)
    return u1
Beispiel #3
0
def MAE2(privacy_budget):
    result2 = selectCi2(1, partitionC(adjustment_features), best_features)

    u2 = algo_2_count.noise_count_error(data[result2],
                                        funcs.cs(data[result2], mcd)['CS_i'],
                                        privacy_budget)

    print(u2)
    return u2
Beispiel #4
0
def mae1(privacy_budget):
    best_and_selected1 = select_ci1(1, backup_solutions, best_f)

    # print_cs('best+u1选出来的特征', data_raw[best_and_selected1])
    # print_cs('best+u1选出来的特征', data_raw[best_and_selected1], float(mcd))

    u1 = algo_2_count.noise_count_error(
        data_raw[best_and_selected1],
        funcs.cs(data_raw[best_and_selected1], mcd)['CS_i'], privacy_budget)

    # cs,阈值为默认0.5
    mae_cs = algo_2_count.noise_count_error(
        data_raw[best_and_selected1],
        funcs.cs(data_raw[best_and_selected1])['CS_i'], privacy_budget)
    # GS,阈值为默认0.5
    mae_gs = algo_2_count.noise_count_error(
        data_raw[best_and_selected1],
        funcs.cs(data_raw[best_and_selected1])['GS'], privacy_budget)
    print('u1' + str(u1))
    print('u1cs' + str(mae_cs))
    print('u1gs' + str(mae_gs))

    return u1, mae_cs, mae_gs
Beispiel #5
0
import numpy as np
import pandas as pd
import funcs

# 读取数据
d1 = pd.read_csv('data/d1.csv')
d2 = pd.read_csv('data/d2.csv')
d = pd.read_csv('data/d_new.csv')

cs_1 = funcs.cs(d1)
print('对D1运行:\n', cs_1)
print('------------------')
cs_2 = funcs.cs(d2)
print('对D2运行:\n', cs_2)
print('------------------')
mcd = np.mean([cs_1['CS_mean'], cs_2['CS_mean']])
print('MCD为', mcd)
print('------------------')
cs_3 = funcs.cs(d)
print('对D运行,阈值为0.5:\n', cs_3)
print('------------------')
cs_3_mcd = funcs.cs(d, threshold=mcd)
print('对D运行,阈值为mcd:\n', cs_3_mcd)
print('------------------')

# 清洗数据
ad_x, ad_y, names = funcs.data_clean(d)
x = ad_x.values
y = list(ad_y)
# 特征选择
best_feature_set_names, adjusted_feature_set_names = funcs.randomized_lasso(
Beispiel #6
0
def MCD():
    n_party_data = [data1, data2]

    return np.mean([funcs.cs(x)['CS_mean'] for x in n_party_data])
Beispiel #7
0
def utilityFunction2(feature_names):

    cs_ci = funcs.cs(data[feature_names], mcd)['CS_i']

    return MCD() / cs_ci
Beispiel #8
0
def print_cs(name, data, threshold=0.5):
    print(name + ':(阈值为 %s )' % threshold)
    print(data.columns)
    print(funcs.cs(data, threshold))
Beispiel #9
0
def u2_initialize(c):
    # 将备选方案中所有子列表转元组,因为list是unhashable
    tuple_c = tuple(tuple(x) for x in c)
    return dict(
        zip(tuple_c, map(lambda x: funcs.cs(data_raw[x], mcd)['CS_i'], c)))
Beispiel #10
0
def get_mcd(subdatas):
    return np.mean([funcs.cs(x)['CS_mean'] for x in subdatas])
Beispiel #11
0
def get_mcd(data_raw):
    return np.mean([funcs.cs(x)['CS_mean'] for x in funcs.split(data_raw, 3)])