def load(data_name, s_id, npp_params, clean=True, physical=False, partial=None, downsample=True): """ load ERN data """ if clean: path = 'EEG_Data/' + data_name + '/clean' else: if physical: path = 'EEG_Data/' + data_name + '/physical-poisoned-{}-{}-{}'.format( npp_params[0], npp_params[1], npp_params[2]) else: path = 'EEG_Data/' + data_name + '/poisoned-{}-{}-{}'.format( npp_params[0], npp_params[1], npp_params[2]) if partial: path = 'EEG_Data/' + data_name + '/partial-{}_poisoned-{}-{}-{}'.format( partial, npp_params[0], npp_params[1], npp_params[2]) if not os.path.exists(path): if data_name == 'ERN': ERNDataget.get(npp_params, clean, physical, partial) elif data_name == 'MI': MIDataget.get(npp_params, clean, physical, partial) elif data_name == 'P300': P300Dataget.get(npp_params, clean, physical, partial) data = loadmat(path + '/s{}.mat'.format(s_id)) eeg = data['eeg'] x = data['x'] y = data['y'] y = np.squeeze(y.flatten()) # downsample if downsample: x1 = x[np.where(y == 0)] x2 = x[np.where(y == 1)] sample_num = min(len(x1), len(x2)) idx1, idx2 = utils.shuffle_data(len(x1)), utils.shuffle_data(len(x2)) x = np.concatenate([x1[idx1[:sample_num]], x2[idx2[:sample_num]]], axis=0) y = np.concatenate( [np.zeros(shape=[sample_num]), np.ones(shape=[sample_num])], axis=0) return eeg, x, y
downsample=True) x_train = np.concatenate((x_train, x_i), axis=0) y_train = np.concatenate((y_train, y_i), axis=0) x_train, y_train, x_validation, y_validation = utils.split_data( [x_train, y_train], split=0.8, shuffle=True) # create poison data _, x_p, y_p = load(data_name, s_id[0], npp_params, clean=False, physical=True, downsample=False) idx = utils.shuffle_data(len(x_p)) x_poison, y_poison = x_p[idx[:int(poison_rate * len(x_train))]], y_p[ idx[:int(poison_rate * len(x_train))]] y_poison = np.ones(shape=y_poison.shape) # add the poison data to train data x_train = np.concatenate((x_train, x_poison), axis=0) y_train = np.concatenate((y_train, y_poison), axis=0) _, x_test, y_test = load(data_name, s_id[s], npp_params, clean=True,
labels = labels.flatten() e = utils.standard_normalize(e) x = utils.standard_normalize(x) if Ek_cl == 0: # 解决concatenate无法拼接空数组的问题 X_cl = x Y_cl = labels Ek_cl = 1 else: X_cl = np.concatenate((X_cl, x), axis=0) Y_cl = np.concatenate((Y_cl, labels), axis=0) x1 = X_cl[np.where(Y_cl == 0)] # 消除类别不平衡的问题 x2 = X_cl[np.where(Y_cl == 1)] sample_num = min(len(x1), len(x2)) idx1, idx2 = utils.shuffle_data(len(x1)), utils.shuffle_data(len(x2)) X_cl = np.concatenate([x1[idx1[:sample_num]], x2[idx2[:sample_num]]], axis=0) Y_cl = np.concatenate( [np.zeros(shape=[sample_num]), np.ones(shape=[sample_num])], axis=0) for s in tqdm(range(3)): x = [] e = [] labels = [] clean = False flag = True for session in range(4): data_names = os.listdir(data_file.format(s + 1, session + 1)) for data_name in data_names:
x_train, y_train, _, _ = utils.split_data([x_train, y_train], split=0.2, shuffle=True) x_validation, y_validation, _, _ = utils.split_data( [x_validation, y_validation], split=0.5, shuffle=True) if not baseline: #若不是baseline,加上污染(后门)数据 x_train = np.concatenate((x_train, x_poison), axis=0) y_train = np.concatenate((y_train, y_poison), axis=0) # if POI: # x_train = np.concatenate((x_train, x_POI), axis=0) # y_train = np.concatenate((y_train, y_POI), axis=0) data_size = y_train.shape[0] #打乱数据顺序,重新排序 shuffle_index = utils.shuffle_data(data_size) x_train = x_train[shuffle_index] y_train = y_train[shuffle_index] print(x_train.shape) nb_classes = len(np.unique( y_train)) # np.unique是去除重复数字然后排序输出,这样输出它的长度表明总共的分类数-可尝试增加分类数看结果的变化 samples = x_train.shape[3] channels = x_train.shape[2] racc = [] rbca = [] rasr = [] pruning_idx = [30, 60, 180] #CNN剪枝的参数 for i in range(repeat):