random.seed(r*10) items = list(df) items.remove('grid') items.remove('grid_group') items.remove('uid') random.shuffle(items) items_train = items[:int(len(items) * 0.8)] items_test = list(set(items) - set(items_train)) df_train = df_cluster.drop(items_test, axis=1) df_test = df_cluster.drop(items_train, axis=1) if method == 'HyObscure': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.HyObscure(df_train, grid_area_dict, area_grid_dict, cluster_num, grid_area_number, grid_list, area_reducibility, area_grid_rowcol_dict, area_grid_colrow_dict, method, grid_rowcol, grid_colrow, l_threshold, k_threshold, deltaX, pp) elif method == 'YGen': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.YGen(df_train, grid_area_dict, grid_area_number, cluster_num, grid_list, area_grid_dict, deltaX, area_reducibility, area_grid_rowcol_dict, area_grid_colrow_dict, method, grid_rowcol, grid_colrow, l_threshold, k_threshold, pp) elif method == 'XObf': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.XObf(df_train, cluster_num, grid_area_number, grid_list, grid_area_dict, area_grid_dict, deltaX, pp, method) elif method == 'PrivCheck': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.PrivCheck(df_train, cluster_num, grid_list, grid_area_dict, grid_area_number, area_grid_dict, deltaX, pp) elif method == 'DP': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.differential_privacy(df_train, grid_area_dict, grid_area_number, beta) elif method == 'Frapp': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.Frapp(df_train, grid_area_dict, gamma) elif method == 'Random': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.Random(df_train, grid_area_dict, p_rand)
funcs.update_age_group(df_cluster, age_group_dict) random.seed(r * 10) items = list(df_item_age_uid) items.remove('age') items.remove('age_group') items.remove('uid') random.shuffle(items) items_train = items[:int(len(items) * 0.8)] items_test = list(set(items) - set(items_train)) df_train = df_cluster.drop(items_test, axis=1) df_test = df_cluster.drop(items_train, axis=1) if method == 'HyObscure': X_obf_dict, X_ori = obfuscations.HyObscure( cluster_num, age_group_number, age_group_dict, group_age_dict, age_list, deltaX, k_threshold, l_threshold, df_train, df_item_age_uid, pp) elif method == 'YGen': X_obf_dict, X_ori = obfuscations.YGen( df_train, age_group_number, cluster_num, age_list, age_group_dict, group_age_dict, df_item_age_uid, deltaX, k_threshold, l_threshold, pp) elif method == 'XObf': X_obf_dict, X_ori = obfuscations.XObf(deltaX, cluster_num, age_group_number, age_list, group_age_dict, df_train, pp) elif method == 'PrivCheck': X_obf_dict, X_ori = obfuscations.PrivCheck( deltaX, cluster_num, age_group_number, df_cluster, df_train, age_list, age_group_dict, group_age_dict, pp)
if cluster_flag == 1: continue df_test_rec_items = df_test_items.loc[test_idx_list] df_test_rec_items = df_test_rec_items.reset_index(drop=True) print("all users num: {}".format(len(df_cluster))) print("split train and test over") print("train num {}".format(len(df_train))) print("test num {}".format(len(df_test))) print("train items {}".format(df_train_items.shape[1])) print("test items {}".format(df_test_items.shape[1])) if method == 'HyObscure': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.HyObscure( deltaX, grid_area_number, cluster_num, k_threshold, l_threshold, df_test, grid_list, area_reducibility, grid_area_dict, area_grid_dict, area_grid_colrow_dict, area_grid_rowcol_dict, grid_colrow, grid_rowcol, df_train, df_test_rec_items, pp, method) elif method == 'PrivCheck': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.PrivCheck( df_train, df_test, df_test_rec_items, grid_area_dict, area_grid_dict, grid_list, cluster_num, grid_area_number, deltaX, pp) else: print('Method error. Check method setting.') break acc_oris_rf = [] acc_obfs_rf = [] acc_oris_xgb = [] acc_obfs_xgb = []
df_test = df_test.reset_index(drop=True) df_test_rec_items = df_test_items.loc[test_idx_list] df_test_rec_items = df_test_rec_items.reset_index(drop=True) print("all users num: {}".format(len(df_item_ageGroup_uid))) print("split train and test over") print("train num {}".format(len(df_train))) print("test num {}".format(len(df_test))) print("train items {}".format(df_train_items.shape[1])) print("test items {}".format(df_test_items.shape[1])) if method == 'HyObscure': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.HyObscure( df_train, df_test, df_test_rec_items, df_item_age_uid, age_group_dict, group_age_dict, cluster_num, age_group_number, age_list, deltaX, k_threshold, l_threshold, pp) elif method == 'PrivCheck': X_obf_dict, X_ori, model_rf, model_xgb = obfuscations.PrivCheck( df_train, df_test, df_test_rec_items, age_group_number, cluster_num, deltaX, age_list, age_group_dict, group_age_dict, pp) else: print('Method error. Check method setting.') break mae_oris_rf = [] mae_obfs_rf = [] mae_oris_xgb = [] mae_obfs_xgb = []