def PrivCheck(df_train, df_test, df_test_rec_items, grid_area_dict, area_grid_dict, grid_list, cluster_num, grid_area_number, deltaX, pp): df_train = funcs.update_grid_group(df_train, grid_area_dict) model_rf = funcs.train_rf_model_check_in(df_train) # xgboost model_xgb = funcs.train_xgb_model_check_in(df_train) pd.DataFrame( funcs.cal_pgy_withoutGridGroup(df_test, cluster_num, grid_list)).to_csv( 'tmp/pgy_check_in_privcheck.csv', index=False, header=None) df_test = funcs.update_grid_group(df_test, grid_area_dict) JSD_Mat_dict = np.zeros((cluster_num, cluster_num, grid_area_number)) group_user_size_dict = {} for gg in range(grid_area_number): df_test_gg = df_test.loc[df_test['grid_group'] == gg] grid_list_gg = area_grid_dict[gg] group_user_size_dict[gg] = df_test_gg.shape[0] JSD_Mat_dict[:, :, gg] = funcs.cal_JSD_Matrix_withoutGridGroup( df_test_gg, cluster_num, 4) scipy.io.savemat('tmp/JSDM_girdGroup_privcheck.mat', {"JSD_Mat_input_Yang_trueTrain": JSD_Mat_dict}) eng = matlab.engine.start_matlab() eng.edit("../../matlab/checkin_tradeoff_scenario_I/PrivCheck", nargout=0) eng.cd('../../matlab/checkin_tradeoff_scenario_I', nargout=0) xpgg, distortion_budget = np.array(eng.PrivCheck(deltaX, nargout=2)) xpgg = np.array(xpgg) df_test['grid_group'] = pd.Series(np.zeros(df_test.shape[0]), index=df_test.index, dtype='int32') X_obf_dict = {} for i in range(50): X_obf_dict[i], _ = funcs.get_obf_X(df_test, xpgg, pp) _, X_ori = funcs.get_obf_X(df_test, xpgg, pp) for i in X_ori.keys(): user_grid = X_ori[i][-2] X_ori[i][-3] = grid_area_dict[user_grid] for j in range(50): X_obf_dict[j][i][-1] = grid_area_dict[user_grid] df_test_rec_items = funcs.update_grid_group(df_test_rec_items, grid_area_dict) return X_obf_dict, X_ori, model_rf, model_xgb
def PrivCheck(df_train, cluster_num, grid_list, grid_area_dict, grid_area_number, area_grid_dict, deltaX, pp): pd.DataFrame( funcs.cal_pgy_withoutGridGroup(df_train, cluster_num, grid_list)).to_csv( 'tmp/pgy_check_in_privcheck.csv', index=False, header=None) df_train = funcs.update_grid_group(df_train, grid_area_dict) JSD_Mat_dict = np.zeros((cluster_num, cluster_num, grid_area_number)) group_user_size_dict = {} for gg in range(grid_area_number): df_train_gg = df_train.loc[df_train['grid_group'] == gg] grid_list_gg = area_grid_dict[gg] group_user_size_dict[gg] = df_train_gg.shape[0] JSD_Mat_dict[:, :, gg] = funcs.cal_JSD_Matrix_withoutGridGroup( df_train_gg, cluster_num, 4) scipy.io.savemat('tmp/JSDM_girdGroup_privcheck.mat', {"JSD_Mat_input_Yang_allObf": JSD_Mat_dict}) eng = matlab.engine.start_matlab() eng.edit("../../matlab/checkin_clusternum_scenario_II/PrivCheck", nargout=0) eng.cd('../../matlab/checkin_clusternum_scenario_II', nargout=0) xpgg, distortion_budget = np.array(eng.PrivCheck(deltaX, nargout=2)) xpgg = np.array(xpgg) df_train = funcs.update_grid_group(df_train, grid_area_dict) X_obf_dict = {} for i in range(25): X_obf_dict[i], _ = funcs.get_obf_X_withAgeGroup(df_train, xpgg, pp) _, X_ori = funcs.get_obf_X_withAgeGroup(df_train, xpgg, pp) for i in X_ori.keys(): user_grid = X_ori[i][-2] X_ori[i][-3] = grid_area_dict[user_grid] for j in range(25): X_obf_dict[j][i][-1] = grid_area_dict[user_grid] return X_obf_dict, X_ori