コード例 #1
0
def PrivCheck(df_train, df_test, df_test_rec_items, grid_area_dict,
              area_grid_dict, grid_list, cluster_num, grid_area_number, deltaX,
              pp):
    df_train = funcs.update_grid_group(df_train, grid_area_dict)
    model_rf = funcs.train_rf_model_check_in(df_train)
    # xgboost
    model_xgb = funcs.train_xgb_model_check_in(df_train)

    pd.DataFrame(
        funcs.cal_pgy_withoutGridGroup(df_test, cluster_num,
                                       grid_list)).to_csv(
                                           'tmp/pgy_check_in_privcheck.csv',
                                           index=False,
                                           header=None)

    df_test = funcs.update_grid_group(df_test, grid_area_dict)

    JSD_Mat_dict = np.zeros((cluster_num, cluster_num, grid_area_number))
    group_user_size_dict = {}

    for gg in range(grid_area_number):
        df_test_gg = df_test.loc[df_test['grid_group'] == gg]
        grid_list_gg = area_grid_dict[gg]
        group_user_size_dict[gg] = df_test_gg.shape[0]

        JSD_Mat_dict[:, :, gg] = funcs.cal_JSD_Matrix_withoutGridGroup(
            df_test_gg, cluster_num, 4)

    scipy.io.savemat('tmp/JSDM_girdGroup_privcheck.mat',
                     {"JSD_Mat_input_Yang_trueTrain": JSD_Mat_dict})

    eng = matlab.engine.start_matlab()
    eng.edit("../../matlab/checkin_tradeoff_scenario_I/PrivCheck", nargout=0)
    eng.cd('../../matlab/checkin_tradeoff_scenario_I', nargout=0)
    xpgg, distortion_budget = np.array(eng.PrivCheck(deltaX, nargout=2))
    xpgg = np.array(xpgg)

    df_test['grid_group'] = pd.Series(np.zeros(df_test.shape[0]),
                                      index=df_test.index,
                                      dtype='int32')

    X_obf_dict = {}
    for i in range(50):
        X_obf_dict[i], _ = funcs.get_obf_X(df_test, xpgg, pp)

    _, X_ori = funcs.get_obf_X(df_test, xpgg, pp)

    for i in X_ori.keys():
        user_grid = X_ori[i][-2]
        X_ori[i][-3] = grid_area_dict[user_grid]
        for j in range(50):
            X_obf_dict[j][i][-1] = grid_area_dict[user_grid]

    df_test_rec_items = funcs.update_grid_group(df_test_rec_items,
                                                grid_area_dict)

    return X_obf_dict, X_ori, model_rf, model_xgb
コード例 #2
0
ファイル: obfuscations.py プロジェクト: scottshufe/HyObscure
def PrivCheck(df_train, cluster_num, grid_list, grid_area_dict,
              grid_area_number, area_grid_dict, deltaX, pp):
    pd.DataFrame(
        funcs.cal_pgy_withoutGridGroup(df_train, cluster_num,
                                       grid_list)).to_csv(
                                           'tmp/pgy_check_in_privcheck.csv',
                                           index=False,
                                           header=None)
    df_train = funcs.update_grid_group(df_train, grid_area_dict)
    JSD_Mat_dict = np.zeros((cluster_num, cluster_num, grid_area_number))
    group_user_size_dict = {}

    for gg in range(grid_area_number):
        df_train_gg = df_train.loc[df_train['grid_group'] == gg]
        grid_list_gg = area_grid_dict[gg]
        group_user_size_dict[gg] = df_train_gg.shape[0]

        JSD_Mat_dict[:, :, gg] = funcs.cal_JSD_Matrix_withoutGridGroup(
            df_train_gg, cluster_num, 4)

    scipy.io.savemat('tmp/JSDM_girdGroup_privcheck.mat',
                     {"JSD_Mat_input_Yang_allObf": JSD_Mat_dict})

    eng = matlab.engine.start_matlab()
    eng.edit("../../matlab/checkin_clusternum_scenario_II/PrivCheck",
             nargout=0)
    eng.cd('../../matlab/checkin_clusternum_scenario_II', nargout=0)
    xpgg, distortion_budget = np.array(eng.PrivCheck(deltaX, nargout=2))
    xpgg = np.array(xpgg)

    df_train = funcs.update_grid_group(df_train, grid_area_dict)
    X_obf_dict = {}
    for i in range(25):
        X_obf_dict[i], _ = funcs.get_obf_X_withAgeGroup(df_train, xpgg, pp)

    _, X_ori = funcs.get_obf_X_withAgeGroup(df_train, xpgg, pp)

    for i in X_ori.keys():
        user_grid = X_ori[i][-2]
        X_ori[i][-3] = grid_area_dict[user_grid]
        for j in range(25):
            X_obf_dict[j][i][-1] = grid_area_dict[user_grid]

    return X_obf_dict, X_ori