def _append_transformed_tokens(self, historical_taa_set, candidate_taas, transformed_tokens): if not transformed_tokens: return preprocessed_transform_text = preprocess_text( ''.join(transformed_tokens)) if preprocessed_transform_text in historical_taa_set: return if preprocessed_transform_text not in candidate_taas: candidate_taas[preprocessed_transform_text] = transformed_tokens historical_taa_set.add(preprocessed_transform_text)
def __init__(self, chinese_chars_file, N=5): """ 发音变换,换成和给定char同音的其他词 """ super().__init__() self.lazy_pinyin_dict = defaultdict(list) self.pinyin_dict = defaultdict(list) with open(chinese_chars_file, encoding='utf-8') as f: processed_char_set = set() for char in f: char = preprocess_text(char.strip()) if char in processed_char_set: continue processed_char_set.add(char) py = pinyin(char) if len(py) > 0: self.pinyin_dict[py[0][-1]].append(char) lazy_py = lazy_pinyin(char) if len(lazy_py) > 0: self.lazy_pinyin_dict[lazy_py[-1]].append(char) self.N = N
def attack(self, raw_texts, rounds=5, topK=5): print('Round:', rounds, 'TopK:', topK) local_scores = [] transformed_texts = [] for raw_text in tqdm(raw_texts): best_score = 0.0 raw_tokens = self.tokenizer(raw_text) best_transformed_text = raw_text best_transformed_tokens = raw_tokens preprocessed_raw_text = preprocess_text(''.join(raw_tokens)) historical_taa_set = {preprocessed_raw_text} candidate_taas = {preprocessed_raw_text: raw_tokens} ############################################################## ### Global transform: 整个句子全都替换掉, 然后用这些样本当做种子 ############################################################## ## 1. 暴力整句替换 for _ in range(3): # 调 3或5没什么区别,速度差一点点 self._append_transformed_tokens( historical_taa_set, candidate_taas, self.homonymic_transform.global_transform( raw_tokens)) # 替换掉所有骂人的关键词 ## 2. 随机整句替换 indices_probs = [ self.transform_dict[token]['scores'] if token in self.transform_dict else 0.0 for token in raw_tokens ] indices_probs_sum = 0 valid_cnt = 0 for prob in indices_probs: indices_probs_sum += prob valid_cnt += int(prob > 0) if indices_probs_sum > 0: indices_probs = [ prob / indices_probs_sum for prob in indices_probs ] for round in range(1): # 这个轮数增多没有实际帮助 for i in range(1, valid_cnt + 1): indices = np.random.choice(len(raw_tokens), i, replace=False, p=indices_probs) new_tokens = raw_tokens[:] for idx in indices: target_token = new_tokens[idx] tsf_tokens = self.transform_dict[target_token][ 'transform_tokens'] tsf_token_probs = self.transform_dict[ target_token]['transform_probs'] tsf_idx = np.random.choice(len(tsf_token_probs), 1, p=tsf_token_probs)[0] new_tokens[idx] = tsf_tokens[tsf_idx] self._append_transformed_tokens( historical_taa_set, candidate_taas, new_tokens) # # 挑选出K个攻击力最强的样本,进行下一轮迭代 # cur_transformed_texts = [] # cur_transformed_tokens = [] # for text in candidate_taas: # cur_transformed_texts.append(text) # cur_transformed_tokens.append(candidate_taas[text]) # ref_texts = [raw_text] * len(cur_transformed_texts) # soft_scores, hard_scores = self.performance_evaluator.calc_final_score(ref_texts, cur_transformed_texts, # show_details=False) # sorted_eval_scores = sorted(enumerate(soft_scores), key=lambda d: d[1], reverse=True)[:topK] # if sorted_eval_scores[0][1] > best_score: # best_score = sorted_eval_scores[0][1] # best_transformed_text = cur_transformed_texts[sorted_eval_scores[0][0]] # best_transformed_tokens = cur_transformed_tokens[sorted_eval_scores[0][0]] # candidate_taas = {} # else: # candidate_taas = {best_transformed_text: best_transformed_tokens} # for idx, score in sorted_eval_scores: # candidate_taas[cur_transformed_texts[idx]] = cur_transformed_tokens[idx] for round in range(rounds): cur_tokens_list = [ candidate_taas[text] for text in candidate_taas ] for tokens_idx, tokens in enumerate(cur_tokens_list): if len(tokens) == 0: continue ## 遗传攻击 for other_tokens_idx, other_tokens in enumerate( cur_tokens_list): if other_tokens_idx == tokens_idx or len( tokens) != len(other_tokens): continue new_tokens = tokens[:] target_token_indices = np.random.choice( len(other_tokens), len(other_tokens) // 2, replace=False) for idx in target_token_indices: if idx < len(new_tokens): new_tokens[idx] = other_tokens[idx] self._append_transformed_tokens( historical_taa_set, candidate_taas, new_tokens) pass idx = random.randint(0, len(tokens) - 1) # Fixme: 换掉随机攻击 if is_alpha(tokens[idx]) and len(tokens[idx]) >= 4: self._append_transformed_tokens( historical_taa_set, candidate_taas, self.char_swap_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.add_transform(tokens, idx)) # self._append_transformed_tokens(historical_taa_set, candidate_taas, self.token_drop_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.radical_transform(tokens, idx)) # 需要注意一些非左右结构的字,比如死、司等 self._append_transformed_tokens( historical_taa_set, candidate_taas, self.hxw_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.hxw_radical_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.radical_chardrop_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.hxw_radical_chardroptransform(tokens, idx)) # self._append_transformed_tokens(historical_taa_set, candidate_taas, # self.token_swap_transform(tokens, idx)) # word lvl的swap很垃圾 self._append_transformed_tokens( historical_taa_set, candidate_taas, self.phonetic_char_swap_transform(tokens, idx)) # # ## fixme: 下面这个是workflow中的小环节,属于特例 # candidates_list = self.pronunciation_transform(tokens, idx, N=5) # transformed_tokens = tokens[:idx] # new_token_chars = [] # for raw_char, candidates in zip(tokens[idx], candidates_list): # for candidate in candidates: # if candidate != raw_char: # new_token_chars.append(candidate) # break # if len(new_token_chars) > 0: # new_token = ''.join(new_token_chars) # else: # new_token = '' # transformed_tokens.append(new_token) # transformed_tokens += tokens[idx + 1:] # self._append_transformed_tokens(historical_taa_set, candidate_taas, transformed_tokens) self._append_transformed_tokens( historical_taa_set, candidate_taas, rule_based_transform(tokens, self.transform_dict)) # 挑选出K个攻击力最强的样本,进行下一轮迭代 cur_transformed_texts = [] cur_transformed_tokens = [] for text in candidate_taas: cur_transformed_texts.append(text) cur_transformed_tokens.append(candidate_taas[text]) ref_texts = [raw_text] * len(cur_transformed_texts) soft_scores, hard_scores = self.performance_evaluator.calc_final_score( ref_texts, cur_transformed_texts, show_details=False) sorted_eval_scores = sorted(enumerate(soft_scores), key=lambda d: d[1], reverse=True)[:topK] if sorted_eval_scores[0][1] > best_score: best_score = sorted_eval_scores[0][1] best_transformed_text = cur_transformed_texts[ sorted_eval_scores[0][0]] best_transformed_tokens = cur_transformed_tokens[ sorted_eval_scores[0][0]] candidate_taas = {} else: candidate_taas = { best_transformed_text: best_transformed_tokens } for idx, score in sorted_eval_scores: candidate_taas[cur_transformed_texts[ idx]] = cur_transformed_tokens[idx] transformed_texts.append(best_transformed_text) local_scores.append(best_score) return transformed_texts, local_scores
def generate_taa_samples(self, raw_texts, group_ids, rounds=5, topK=5): transformed_texts = [] new_group_ids = [] for raw_text, group_id in tqdm(zip(raw_texts, group_ids), total=len(raw_texts)): if isinstance(group_id, int): is_obs = (group_id == 1) else: is_obs = group_id.startswith('obs') texts_to_add = set() raw_tokens = self.tokenizer(raw_text) preprocessed_raw_text = preprocess_text(''.join(raw_tokens)) historical_taa_set = {preprocessed_raw_text} candidate_taas = {preprocessed_raw_text: raw_tokens} for round in range(rounds): cur_tokens_list = [candidate_taas[text] for text in candidate_taas] for tokens_idx, tokens in enumerate(cur_tokens_list): if len(tokens) == 0: continue ## 遗传攻击 for other_tokens_idx, other_tokens in enumerate( cur_tokens_list): if other_tokens_idx == tokens_idx or len(tokens) != len( other_tokens): continue new_tokens = tokens[:] target_token_indices = np.random.choice( len(other_tokens), len(other_tokens) // 2, replace=False) for idx in target_token_indices: if idx < len(new_tokens): new_tokens[idx] = other_tokens[idx] self._append_transformed_tokens(historical_taa_set, candidate_taas, new_tokens) idx = random.randint(0, len(tokens) - 1) # Fixme: 换掉随机攻击 if is_alpha(tokens[idx]) and len(tokens[idx]) >= 4: self._append_transformed_tokens( historical_taa_set, candidate_taas, self.char_swap_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.add_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.token_drop_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.token_swap_transform(tokens, idx)) # word lvl的swap很垃圾 self._append_transformed_tokens( historical_taa_set, candidate_taas, self.radical_transform(tokens, idx)) # 需要注意一些非左右结构的字,比如死、司等 self._append_transformed_tokens( historical_taa_set, candidate_taas, self.phonetic_char_swap_transform(tokens, idx)) self._append_transformed_tokens( historical_taa_set, candidate_taas, self.hxw_transform(tokens, idx)) # ## fixme: 下面这个是workflow中的小环节,属于特例 candidates_list = self.pronunciation_transform(tokens, idx, N=None) transformed_tokens = tokens[:idx] new_token_chars = [] for raw_char, candidates in zip(tokens[idx], candidates_list): for candidate in candidates: if candidate != raw_char: new_token_chars.append(candidate) break if len(new_token_chars) > 0: new_token = ''.join(new_token_chars) else: new_token = '' transformed_tokens.append(new_token) transformed_tokens += tokens[idx + 1:] self._append_transformed_tokens(historical_taa_set, candidate_taas, transformed_tokens) # 挑选出K个攻击力最强的样本,进行下一轮迭代 cur_transformed_texts = [] cur_transformed_tokens = [] for text in candidate_taas: cur_transformed_texts.append(text) cur_transformed_tokens.append(candidate_taas[text]) ref_texts = [raw_text] * len(cur_transformed_texts) soft_scores, hasrd_scores = self.performance_evaluator.calc_final_score( ref_texts, cur_transformed_texts, show_details=False, is_obs=is_obs) sorted_eval_scores = sorted(enumerate(soft_scores), key=lambda d: d[1], reverse=True)[:topK] candidate_taas = {} for idx, score in sorted_eval_scores: candidate_taas[ cur_transformed_texts[idx]] = cur_transformed_tokens[idx] texts_to_add.add(cur_transformed_texts[sorted_eval_scores[0] [0]]) # 每轮加一个最高分,最后一轮全加上 texts_to_add |= set(cur_transformed_texts) transformed_texts.extend(list(texts_to_add)) new_group_ids.extend([group_id] * len(texts_to_add)) return transformed_texts, new_group_ids
def attack(self, raw_texts, rounds=5, topK=5, debug=False, kw_freq_thres=20.0): print('Round:', rounds, 'TopK:', topK) local_scores = [] transformed_texts = [] for i_text, raw_text in tqdm(enumerate(raw_texts), total=len(raw_texts)): best_score = 0.0 raw_tokens = self.tokenizer(raw_text) kw_freqs = [] for token in raw_tokens: if token not in self.kw_freq_dict: self.kw_freq_dict[token] = 0 self.kw_freq_dict[token] += 5 kw_freqs.append(self.kw_freq_dict[token]) self.local_kw_freq_dict = self.kw_freq_dict.copy( ) # 复制一个全局dict的副本,在高频次query时使用本地副本可以避免进程同步带来的巨大同步耗时 mean_freq = np.mean(kw_freqs) best_transformed_text = raw_text best_transformed_tokens = raw_tokens ## todo: 可以改成tokens中见过的词太少的话(平均频次低于阈值),就换成kw idf模式 # if i_text <= kw_idf_cnt: if mean_freq < kw_freq_thres: kw_scores = self.kw_identification(raw_tokens, len(raw_tokens)) kw_scores = [score for _, score in kw_scores] preprocessed_raw_text = preprocess_text(''.join(raw_tokens)) historical_taas = {preprocessed_raw_text} candidate_taas = {} ############################################################## ### Global transform: 整个句子全都替换掉, 然后用这些样本当做种子 ############################################################## # 替换掉所有骂人的关键词 for transform in self.global_transforms: for i in range(topK): # 调大的话效果会好一点 self._append_transformed_tokens( historical_taas, candidate_taas, transform.global_transform(raw_tokens)) if len(candidate_taas) == 0: candidate_taas = {preprocessed_raw_text: raw_tokens} cur_rounds = rounds # 当前text的运行轮数,根据长度进行调整 if len(raw_tokens) < 50: # 30不会,50不确定 cur_rounds = int(cur_rounds * (1.5 - 0.1 * len(raw_tokens) // 10)) for round in range(1, cur_rounds + 1): cur_tokens_list = [ candidate_taas[text] for text in candidate_taas ] for tokens_idx, tokens in enumerate(cur_tokens_list): if len(tokens) == 0: continue # # 暴力多点交叉遗传攻击, 肉眼观察较差,但是线上较强 # for other_tokens_idx, other_tokens in enumerate(cur_tokens_list): # if other_tokens_idx == tokens_idx or len(tokens) != len(other_tokens): # continue # # for ratio in [2]: # if len(tokens) < ratio: # continue # new_tokens1 = tokens[:] # new_tokens2 = other_tokens[:] # 虽然for循环本身就会遍历到(i,j)和(j,i)的情况,但是多来一次可以增加多样性 # target_token_indices = np.random.choice(len(other_tokens), len(other_tokens) // ratio, replace=False) # for idx in target_token_indices: # if idx < len(new_tokens1): # new_tokens1[idx] = other_tokens[idx] # if idx > len(new_tokens2): # new_tokens2[idx] = tokens[idx] # self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens1) # self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens2) pass # ## cross over遗传攻击, 线下&肉眼较强,但是线上很差 # for other_tokens_idx, other_tokens in enumerate(cur_tokens_list): # if other_tokens_idx == tokens_idx: # continue # # try: # tgt_idx = random.randint(3, min(len(tokens), len(other_tokens)) - 3) # 头尾几个点不截取 # new_tokens1 = tokens[:tgt_idx] + other_tokens[tgt_idx:] # new_tokens2 = other_tokens[:tgt_idx] + tokens[tgt_idx:] # self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens1) # self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens2) # except: # pass pass idx_probs = None if round % 2: try: if mean_freq < kw_freq_thres: freqs = kw_scores # 可能会因为add、drop导致idx错位,不过暂时先忽略 freqs = freqs[:len(tokens)] freqs += [0] * (len(tokens) - len(freqs)) freqs = np.array(freqs) freqs = freqs - freqs.min() + 0.01 else: # fixme: 这里可以改成local_kw来提速如果有必要的话 freqs = np.array([ self.kw_freq_dict[token] if token in self.kw_freq_dict else 1 for token in tokens ]) idx_probs = freqs / freqs.sum() except: pass idx = np.random.choice(list(range(len(tokens))), 1, p=idx_probs)[0] # 针对关键词的定向攻击 indices = np.random.choice(list(range(len(tokens))), min(3, len(tokens)), p=idx_probs) # 批量替换 ## 开始单点替换 if is_alpha(tokens[idx]) and len(tokens[idx]) >= 4: for transform in self.alpha_transforms: self._append_transformed_tokens( historical_taas, candidate_taas, transform(tokens, idx)) # if len(tokens[idx]) > 1: # ## 对于非英文的、经过转换的token,直接continue掉避免影响可读性。 # # (本来可能是拆分成偏旁,然后偏旁->变成别的东西,或者te -> t恶之类的) # # 对速度影响不大,说明这类样本本身并不是很多 # continue for transform in self.multi_rounds_transforms: for _ in range(3): self._append_transformed_tokens( historical_taas, candidate_taas, transform(tokens, idx)) for transform in self.random_transforms: self._append_transformed_tokens( historical_taas, candidate_taas, transform(tokens, idx)) for transform in self.fixed_transforms: self._append_transformed_tokens( historical_taas, candidate_taas, transform(tokens, idx)) ## 开始批量替换,主要是为拼音\add等不会严重影响可读性方法服务,克服这些方法在jaccard指标上的劣势 indices = sorted(indices, reverse=True) # 降序排列,为add服务 for transform in self.multi_ptr_transforms: self._append_transformed_tokens( historical_taas, candidate_taas, transform.multi_ptr_trans(tokens, indices)) # 挑选出K个攻击力最强的样本,进行下一轮迭代 cur_transformed_texts = [] cur_transformed_tokens = [] for text in candidate_taas: cur_transformed_texts.append(text) cur_transformed_tokens.append(candidate_taas[text]) ref_texts = [raw_text] * len(cur_transformed_texts) soft_scores, hard_scores = self.performance_evaluator.calc_final_score( ref_texts, cur_transformed_texts, show_details=False) ## 词频加权的最终得分,该策略用于对抗线上的自动防御机制 freqs = np.array([ sum([ self.local_kw_freq_dict[token] if token in self.local_kw_freq_dict else 1 for token in tokens ]) for tokens in cur_transformed_tokens ]) freq_weights = (freqs - freqs.min()) / (freqs.max() - freqs.min()) freq_weights = 1.0 - 0.2 * freq_weights soft_scores *= freq_weights sorted_eval_scores = sorted(enumerate(soft_scores), key=lambda d: d[1], reverse=True) if sorted_eval_scores[0][1] > best_score: best_score = sorted_eval_scores[0][1] best_transformed_text = cur_transformed_texts[ sorted_eval_scores[0][0]] best_transformed_tokens = cur_transformed_tokens[ sorted_eval_scores[0][0]] # best_transformed_tokens = self.tokenizer(best_transformed_text) # 额外tokenize一下好像没什么区别,速度也没有影响 candidate_taas = {} else: candidate_taas = { best_transformed_text: best_transformed_tokens } for idx, score in sorted_eval_scores[:topK]: candidate_taas[cur_transformed_texts[ idx]] = cur_transformed_tokens[idx] # candidate_taas[cur_transformed_texts[idx]] = self.tokenizer(cur_transformed_texts[idx]) # 然后额外随机选择2个弱鸡模型加到下一轮迭代中去,以保证样本多样性, 线上完全没用 # try: # extra_cnt = 2 # probs = np.array([score for idx, score in sorted_eval_scores[topK:]]) # 从topk以外的样本中选 # probs = probs / probs.sum() # rnd_sample_indices = np.random.choice(list(range(topK, len(sorted_eval_scores))), extra_cnt, replace=False, # p=probs) # for idx in rnd_sample_indices: # idx = sorted_eval_scores[idx][0] # candidate_taas[cur_transformed_texts[idx]] = cur_transformed_tokens[idx] # # candidate_taas[cur_transformed_texts[idx]] = self.tokenizer(cur_transformed_texts[idx]) # except: # pass pass for token in best_transformed_tokens: if token not in self.kw_freq_dict: self.kw_freq_dict[token] = 0 self.kw_freq_dict[token] += 2 transformed_texts.append(best_transformed_text) local_scores.append(best_score) if debug: ## 算贡献度 for transform in self.transforms: tokens_list = transform.transformed_tokens if not tokens_list: continue cur_transformed_texts = list( set([ preprocess_text(''.join(tokens)) for tokens in tokens_list ])) ref_texts = [raw_text] * len(cur_transformed_texts) soft_scores, hard_scores = self.performance_evaluator.calc_final_score( ref_texts, cur_transformed_texts, show_details=False) transform.mean_scores.append(np.mean(soft_scores)) transform.max_scores.append(np.max(soft_scores)) transform.clear() if debug: print('-' * 80) print('Mean of Mean scores:') print('-' * 80) score_records = [] for transform in self.transforms: scores = transform.mean_scores score = 0 if scores: score = np.mean(scores) score_records.append((transform, score), ) score_records = sorted(score_records, key=lambda d: d[1], reverse=True) for k, v in score_records: print(k, v) print('-' * 80) print('Mean of Max scores:') print('-' * 80) score_records = [] for transform in self.transforms: scores = transform.max_scores score = 0 if scores: score = np.mean(scores) score_records.append((transform, score), ) score_records = sorted(score_records, key=lambda d: d[1], reverse=True) for k, v in score_records: print(k, v) print('-' * 80) print('Max of Max scores:') print('-' * 80) score_records = [] for transform in self.transforms: scores = transform.max_scores score = 0 if scores: score = np.max(scores) score_records.append((transform, score), ) score_records = sorted(score_records, key=lambda d: d[1], reverse=True) for k, v in score_records: print(k, v) # print('-' * 80) # for token, freq in sorted(self.kw_freq_dict.items(), key=lambda d: d[1], reverse=True)[:50]: # print(token, freq) # print('Len freq dict:', len(self.kw_freq_dict)) # print('-' * 80) return transformed_texts, local_scores
import fasttext import jieba from materials.preprocessing_module import preprocess_text model_path = '../data/materials/mini.ftz' model = fasttext.load_model(model_path) is_obscenity_dict = {'__label__0': 0, '__label__1': 1} # file = '../data/obscenities.txt' file = '../data/corpus.txt' wf = open('corpus_output.txt', 'w', encoding='utf-8') with open(file, encoding='utf-8') as f: for line in f: text = line.strip() text = preprocess_text(text) text = ' '.join(jieba.cut(text)) (lbl,), (score,) = model.predict(text) if not is_obscenity_dict[lbl]: score = 1 - score wf.write('%s\t%.6f\n' % (text, score)) wf.close()
def _preprocess(self, texts): texts = [preprocess_text(text) for text in texts] return texts
for cur_idx, (raw_idx, line) in enumerate(inp_lines) } inp_lines = [line for idx, line in inp_lines] with mp.Pool(processes=n_cpu) as p: samples_split = np.array_split(inp_lines, n_cpu) pool_results = p.map( partial(_do_attack, kw_freq_dict=kw_freq_dict), samples_split) out_lines = list( np.concatenate([results[0] for results in pool_results])) out_lines = [ out_lines[indices_map[idx]] for idx in range(len(out_lines)) ] local_scores = list( np.concatenate([results[1] for results in pool_results])) else: out_lines, local_scores = _do_attack(inp_lines, kw_freq_dict) print(sum(local_scores) / len(local_scores)) print('Time:', time.time() - time0) try: target = json.dumps({'text': out_lines}, ensure_ascii=False) with open(out_path, 'w', encoding='utf-8') as f: f.write(target) except: from materials.preprocessing_module import preprocess_text out_lines = [preprocess_text(line) for line in out_lines] target = json.dumps({'text': out_lines}, ensure_ascii=False) with open(out_path, 'w', encoding='utf-8') as f: f.write(target)
from attackers import is_alpha if __name__ == '__main__': model_path = '../data/materials/mini.ftz' inference_model = FastTextInferenceModel(model_path) bert_tokenizer = BertTokenizer.from_pretrained('../data/chinese_vocab.txt', do_lower_case=True) tokenizer = lambda x: bert_tokenizer.basic_tokenizer.tokenize(x) # tokenizer = lambda x: list(jieba.cut(x)) # 用于fasttext obscenities = set() with open('../data/obscenities.txt', encoding='utf-8') as f: for line in f: content = line.strip() content = preprocess_text(content) obscenities.add(content) obscenities = list(obscenities) vec_emb_path = '../data/materials/zh.300.vec.gz' fasttext_model_path = '../data/materials/mini.ftz' fasttext_model = FastTextInferenceModel(fasttext_model_path) performance_evaluator = PerformanceEvaluator( vec_emb_path, defence_model=fasttext_model) # 模拟远程防御模型,找到强力攻击样本 target_chars_transform_dict = json.load( open('bert_tokenizer_top25%.raw_trans_dict.json', encoding='utf-8')) ranked_transform_dict = OrderedDict() for t in target_chars_transform_dict: ranked_transform_dict[t] = { 'scores': target_chars_transform_dict[t]['scores'],