Ejemplo n.º 1
0
 def _append_transformed_tokens(self, historical_taa_set, candidate_taas,
                                transformed_tokens):
     if not transformed_tokens:
         return
     preprocessed_transform_text = preprocess_text(
         ''.join(transformed_tokens))
     if preprocessed_transform_text in historical_taa_set:
         return
     if preprocessed_transform_text not in candidate_taas:
         candidate_taas[preprocessed_transform_text] = transformed_tokens
         historical_taa_set.add(preprocessed_transform_text)
 def __init__(self, chinese_chars_file, N=5):
     """
 发音变换,换成和给定char同音的其他词
 """
     super().__init__()
     self.lazy_pinyin_dict = defaultdict(list)
     self.pinyin_dict = defaultdict(list)
     with open(chinese_chars_file, encoding='utf-8') as f:
         processed_char_set = set()
         for char in f:
             char = preprocess_text(char.strip())
             if char in processed_char_set:
                 continue
             processed_char_set.add(char)
             py = pinyin(char)
             if len(py) > 0:
                 self.pinyin_dict[py[0][-1]].append(char)
             lazy_py = lazy_pinyin(char)
             if len(lazy_py) > 0:
                 self.lazy_pinyin_dict[lazy_py[-1]].append(char)
     self.N = N
Ejemplo n.º 3
0
    def attack(self, raw_texts, rounds=5, topK=5):
        print('Round:', rounds, 'TopK:', topK)
        local_scores = []
        transformed_texts = []
        for raw_text in tqdm(raw_texts):
            best_score = 0.0
            raw_tokens = self.tokenizer(raw_text)
            best_transformed_text = raw_text
            best_transformed_tokens = raw_tokens

            preprocessed_raw_text = preprocess_text(''.join(raw_tokens))
            historical_taa_set = {preprocessed_raw_text}
            candidate_taas = {preprocessed_raw_text: raw_tokens}

            ##############################################################
            ### Global transform: 整个句子全都替换掉, 然后用这些样本当做种子
            ##############################################################
            ## 1. 暴力整句替换
            for _ in range(3):  # 调 3或5没什么区别,速度差一点点
                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.homonymic_transform.global_transform(
                        raw_tokens))  # 替换掉所有骂人的关键词

            ## 2. 随机整句替换
            indices_probs = [
                self.transform_dict[token]['scores']
                if token in self.transform_dict else 0.0
                for token in raw_tokens
            ]
            indices_probs_sum = 0
            valid_cnt = 0
            for prob in indices_probs:
                indices_probs_sum += prob
                valid_cnt += int(prob > 0)
            if indices_probs_sum > 0:
                indices_probs = [
                    prob / indices_probs_sum for prob in indices_probs
                ]
                for round in range(1):  # 这个轮数增多没有实际帮助
                    for i in range(1, valid_cnt + 1):
                        indices = np.random.choice(len(raw_tokens),
                                                   i,
                                                   replace=False,
                                                   p=indices_probs)
                        new_tokens = raw_tokens[:]
                        for idx in indices:
                            target_token = new_tokens[idx]
                            tsf_tokens = self.transform_dict[target_token][
                                'transform_tokens']
                            tsf_token_probs = self.transform_dict[
                                target_token]['transform_probs']
                            tsf_idx = np.random.choice(len(tsf_token_probs),
                                                       1,
                                                       p=tsf_token_probs)[0]
                            new_tokens[idx] = tsf_tokens[tsf_idx]
                        self._append_transformed_tokens(
                            historical_taa_set, candidate_taas, new_tokens)

                # # 挑选出K个攻击力最强的样本,进行下一轮迭代
                # cur_transformed_texts = []
                # cur_transformed_tokens = []
                # for text in candidate_taas:
                #   cur_transformed_texts.append(text)
                #   cur_transformed_tokens.append(candidate_taas[text])
                # ref_texts = [raw_text] * len(cur_transformed_texts)
                # soft_scores, hard_scores = self.performance_evaluator.calc_final_score(ref_texts, cur_transformed_texts,
                #                                                                        show_details=False)
                # sorted_eval_scores = sorted(enumerate(soft_scores), key=lambda d: d[1], reverse=True)[:topK]
                # if sorted_eval_scores[0][1] > best_score:
                #   best_score = sorted_eval_scores[0][1]
                #   best_transformed_text = cur_transformed_texts[sorted_eval_scores[0][0]]
                #   best_transformed_tokens = cur_transformed_tokens[sorted_eval_scores[0][0]]
                #   candidate_taas = {}
                # else:
                #   candidate_taas = {best_transformed_text: best_transformed_tokens}
                # for idx, score in sorted_eval_scores:
                #   candidate_taas[cur_transformed_texts[idx]] = cur_transformed_tokens[idx]

            for round in range(rounds):
                cur_tokens_list = [
                    candidate_taas[text] for text in candidate_taas
                ]
                for tokens_idx, tokens in enumerate(cur_tokens_list):
                    if len(tokens) == 0:
                        continue
                    ## 遗传攻击
                    for other_tokens_idx, other_tokens in enumerate(
                            cur_tokens_list):
                        if other_tokens_idx == tokens_idx or len(
                                tokens) != len(other_tokens):
                            continue

                        new_tokens = tokens[:]
                        target_token_indices = np.random.choice(
                            len(other_tokens),
                            len(other_tokens) // 2,
                            replace=False)
                        for idx in target_token_indices:
                            if idx < len(new_tokens):
                                new_tokens[idx] = other_tokens[idx]
                        self._append_transformed_tokens(
                            historical_taa_set, candidate_taas, new_tokens)
                    pass
                    idx = random.randint(0, len(tokens) - 1)  # Fixme: 换掉随机攻击

                    if is_alpha(tokens[idx]) and len(tokens[idx]) >= 4:
                        self._append_transformed_tokens(
                            historical_taa_set, candidate_taas,
                            self.char_swap_transform(tokens, idx))

                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.add_transform(tokens, idx))
                    # self._append_transformed_tokens(historical_taa_set, candidate_taas, self.token_drop_transform(tokens, idx))
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.radical_transform(tokens,
                                               idx))  # 需要注意一些非左右结构的字,比如死、司等

                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.hxw_transform(tokens, idx))
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.hxw_radical_transform(tokens, idx))
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.radical_chardrop_transform(tokens, idx))
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.hxw_radical_chardroptransform(tokens, idx))

                    # self._append_transformed_tokens(historical_taa_set, candidate_taas,
                    #                                 self.token_swap_transform(tokens, idx))  # word lvl的swap很垃圾
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.phonetic_char_swap_transform(tokens, idx))

                    # # ## fixme: 下面这个是workflow中的小环节,属于特例
                    # candidates_list = self.pronunciation_transform(tokens, idx, N=5)
                    # transformed_tokens = tokens[:idx]
                    # new_token_chars = []
                    # for raw_char, candidates in zip(tokens[idx], candidates_list):
                    #   for candidate in candidates:
                    #     if candidate != raw_char:
                    #       new_token_chars.append(candidate)
                    #       break
                    # if len(new_token_chars) > 0:
                    #   new_token = ''.join(new_token_chars)
                    # else:
                    #   new_token = ''
                    # transformed_tokens.append(new_token)
                    # transformed_tokens += tokens[idx + 1:]
                    # self._append_transformed_tokens(historical_taa_set, candidate_taas, transformed_tokens)
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        rule_based_transform(tokens, self.transform_dict))

                # 挑选出K个攻击力最强的样本,进行下一轮迭代
                cur_transformed_texts = []
                cur_transformed_tokens = []
                for text in candidate_taas:
                    cur_transformed_texts.append(text)
                    cur_transformed_tokens.append(candidate_taas[text])
                ref_texts = [raw_text] * len(cur_transformed_texts)
                soft_scores, hard_scores = self.performance_evaluator.calc_final_score(
                    ref_texts, cur_transformed_texts, show_details=False)
                sorted_eval_scores = sorted(enumerate(soft_scores),
                                            key=lambda d: d[1],
                                            reverse=True)[:topK]
                if sorted_eval_scores[0][1] > best_score:
                    best_score = sorted_eval_scores[0][1]
                    best_transformed_text = cur_transformed_texts[
                        sorted_eval_scores[0][0]]
                    best_transformed_tokens = cur_transformed_tokens[
                        sorted_eval_scores[0][0]]
                    candidate_taas = {}
                else:
                    candidate_taas = {
                        best_transformed_text: best_transformed_tokens
                    }
                for idx, score in sorted_eval_scores:
                    candidate_taas[cur_transformed_texts[
                        idx]] = cur_transformed_tokens[idx]

            transformed_texts.append(best_transformed_text)
            local_scores.append(best_score)

        return transformed_texts, local_scores
Ejemplo n.º 4
0
def generate_taa_samples(self, raw_texts, group_ids, rounds=5, topK=5):
    transformed_texts = []
    new_group_ids = []
    for raw_text, group_id in tqdm(zip(raw_texts, group_ids),
                                   total=len(raw_texts)):
        if isinstance(group_id, int):
            is_obs = (group_id == 1)
        else:
            is_obs = group_id.startswith('obs')

        texts_to_add = set()
        raw_tokens = self.tokenizer(raw_text)

        preprocessed_raw_text = preprocess_text(''.join(raw_tokens))
        historical_taa_set = {preprocessed_raw_text}
        candidate_taas = {preprocessed_raw_text: raw_tokens}
        for round in range(rounds):
            cur_tokens_list = [candidate_taas[text] for text in candidate_taas]
            for tokens_idx, tokens in enumerate(cur_tokens_list):
                if len(tokens) == 0:
                    continue
                ## 遗传攻击
                for other_tokens_idx, other_tokens in enumerate(
                        cur_tokens_list):
                    if other_tokens_idx == tokens_idx or len(tokens) != len(
                            other_tokens):
                        continue

                    new_tokens = tokens[:]
                    target_token_indices = np.random.choice(
                        len(other_tokens),
                        len(other_tokens) // 2,
                        replace=False)
                    for idx in target_token_indices:
                        if idx < len(new_tokens):
                            new_tokens[idx] = other_tokens[idx]
                    self._append_transformed_tokens(historical_taa_set,
                                                    candidate_taas, new_tokens)

                idx = random.randint(0, len(tokens) - 1)  # Fixme: 换掉随机攻击

                if is_alpha(tokens[idx]) and len(tokens[idx]) >= 4:
                    self._append_transformed_tokens(
                        historical_taa_set, candidate_taas,
                        self.char_swap_transform(tokens, idx))

                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.add_transform(tokens, idx))
                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.token_drop_transform(tokens, idx))
                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.token_swap_transform(tokens, idx))  # word lvl的swap很垃圾

                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.radical_transform(tokens,
                                           idx))  # 需要注意一些非左右结构的字,比如死、司等

                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.phonetic_char_swap_transform(tokens, idx))

                self._append_transformed_tokens(
                    historical_taa_set, candidate_taas,
                    self.hxw_transform(tokens, idx))

                # ## fixme: 下面这个是workflow中的小环节,属于特例
                candidates_list = self.pronunciation_transform(tokens,
                                                               idx,
                                                               N=None)
                transformed_tokens = tokens[:idx]
                new_token_chars = []
                for raw_char, candidates in zip(tokens[idx], candidates_list):
                    for candidate in candidates:
                        if candidate != raw_char:
                            new_token_chars.append(candidate)
                            break
                if len(new_token_chars) > 0:
                    new_token = ''.join(new_token_chars)
                else:
                    new_token = ''
                transformed_tokens.append(new_token)
                transformed_tokens += tokens[idx + 1:]
                self._append_transformed_tokens(historical_taa_set,
                                                candidate_taas,
                                                transformed_tokens)

            # 挑选出K个攻击力最强的样本,进行下一轮迭代
            cur_transformed_texts = []
            cur_transformed_tokens = []
            for text in candidate_taas:
                cur_transformed_texts.append(text)
                cur_transformed_tokens.append(candidate_taas[text])
            ref_texts = [raw_text] * len(cur_transformed_texts)
            soft_scores, hasrd_scores = self.performance_evaluator.calc_final_score(
                ref_texts,
                cur_transformed_texts,
                show_details=False,
                is_obs=is_obs)
            sorted_eval_scores = sorted(enumerate(soft_scores),
                                        key=lambda d: d[1],
                                        reverse=True)[:topK]
            candidate_taas = {}
            for idx, score in sorted_eval_scores:
                candidate_taas[
                    cur_transformed_texts[idx]] = cur_transformed_tokens[idx]
            texts_to_add.add(cur_transformed_texts[sorted_eval_scores[0]
                                                   [0]])  # 每轮加一个最高分,最后一轮全加上
        texts_to_add |= set(cur_transformed_texts)
        transformed_texts.extend(list(texts_to_add))
        new_group_ids.extend([group_id] * len(texts_to_add))

    return transformed_texts, new_group_ids
Ejemplo n.º 5
0
    def attack(self,
               raw_texts,
               rounds=5,
               topK=5,
               debug=False,
               kw_freq_thres=20.0):
        print('Round:', rounds, 'TopK:', topK)
        local_scores = []
        transformed_texts = []
        for i_text, raw_text in tqdm(enumerate(raw_texts),
                                     total=len(raw_texts)):
            best_score = 0.0
            raw_tokens = self.tokenizer(raw_text)
            kw_freqs = []
            for token in raw_tokens:
                if token not in self.kw_freq_dict:
                    self.kw_freq_dict[token] = 0
                self.kw_freq_dict[token] += 5
                kw_freqs.append(self.kw_freq_dict[token])
            self.local_kw_freq_dict = self.kw_freq_dict.copy(
            )  # 复制一个全局dict的副本,在高频次query时使用本地副本可以避免进程同步带来的巨大同步耗时
            mean_freq = np.mean(kw_freqs)
            best_transformed_text = raw_text
            best_transformed_tokens = raw_tokens

            ## todo: 可以改成tokens中见过的词太少的话(平均频次低于阈值),就换成kw idf模式
            # if i_text <= kw_idf_cnt:
            if mean_freq < kw_freq_thres:
                kw_scores = self.kw_identification(raw_tokens, len(raw_tokens))
                kw_scores = [score for _, score in kw_scores]

            preprocessed_raw_text = preprocess_text(''.join(raw_tokens))
            historical_taas = {preprocessed_raw_text}
            candidate_taas = {}
            ##############################################################
            ### Global transform: 整个句子全都替换掉, 然后用这些样本当做种子
            ##############################################################
            # 替换掉所有骂人的关键词
            for transform in self.global_transforms:
                for i in range(topK):  # 调大的话效果会好一点
                    self._append_transformed_tokens(
                        historical_taas, candidate_taas,
                        transform.global_transform(raw_tokens))
            if len(candidate_taas) == 0:
                candidate_taas = {preprocessed_raw_text: raw_tokens}

            cur_rounds = rounds  # 当前text的运行轮数,根据长度进行调整
            if len(raw_tokens) < 50:  # 30不会,50不确定
                cur_rounds = int(cur_rounds *
                                 (1.5 - 0.1 * len(raw_tokens) // 10))
            for round in range(1, cur_rounds + 1):
                cur_tokens_list = [
                    candidate_taas[text] for text in candidate_taas
                ]
                for tokens_idx, tokens in enumerate(cur_tokens_list):
                    if len(tokens) == 0:
                        continue
                    # # 暴力多点交叉遗传攻击, 肉眼观察较差,但是线上较强
                    # for other_tokens_idx, other_tokens in enumerate(cur_tokens_list):
                    #   if other_tokens_idx == tokens_idx or len(tokens) != len(other_tokens):
                    #     continue
                    #
                    #   for ratio in [2]:
                    #     if len(tokens) < ratio:
                    #       continue
                    #     new_tokens1 = tokens[:]
                    #     new_tokens2 = other_tokens[:]  # 虽然for循环本身就会遍历到(i,j)和(j,i)的情况,但是多来一次可以增加多样性
                    #     target_token_indices = np.random.choice(len(other_tokens), len(other_tokens) // ratio, replace=False)
                    #     for idx in target_token_indices:
                    #       if idx < len(new_tokens1):
                    #         new_tokens1[idx] = other_tokens[idx]
                    #       if idx > len(new_tokens2):
                    #         new_tokens2[idx] = tokens[idx]
                    #     self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens1)
                    #     self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens2)
                    pass

                    # ## cross over遗传攻击, 线下&肉眼较强,但是线上很差
                    # for other_tokens_idx, other_tokens in enumerate(cur_tokens_list):
                    #   if other_tokens_idx == tokens_idx:
                    #     continue
                    #
                    #   try:
                    #     tgt_idx = random.randint(3, min(len(tokens), len(other_tokens)) - 3)  # 头尾几个点不截取
                    #     new_tokens1 = tokens[:tgt_idx] + other_tokens[tgt_idx:]
                    #     new_tokens2 = other_tokens[:tgt_idx] + tokens[tgt_idx:]
                    #     self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens1)
                    #     self._append_transformed_tokens(historical_taas, candidate_taas, new_tokens2)
                    #   except:
                    #     pass
                    pass
                    idx_probs = None
                    if round % 2:
                        try:
                            if mean_freq < kw_freq_thres:
                                freqs = kw_scores  # 可能会因为add、drop导致idx错位,不过暂时先忽略
                                freqs = freqs[:len(tokens)]
                                freqs += [0] * (len(tokens) - len(freqs))
                                freqs = np.array(freqs)
                                freqs = freqs - freqs.min() + 0.01
                            else:
                                # fixme: 这里可以改成local_kw来提速如果有必要的话
                                freqs = np.array([
                                    self.kw_freq_dict[token]
                                    if token in self.kw_freq_dict else 1
                                    for token in tokens
                                ])
                            idx_probs = freqs / freqs.sum()
                        except:
                            pass
                    idx = np.random.choice(list(range(len(tokens))),
                                           1,
                                           p=idx_probs)[0]  # 针对关键词的定向攻击
                    indices = np.random.choice(list(range(len(tokens))),
                                               min(3, len(tokens)),
                                               p=idx_probs)  # 批量替换

                    ## 开始单点替换
                    if is_alpha(tokens[idx]) and len(tokens[idx]) >= 4:
                        for transform in self.alpha_transforms:
                            self._append_transformed_tokens(
                                historical_taas, candidate_taas,
                                transform(tokens, idx))

                    # if len(tokens[idx]) > 1:
                    #   ## 对于非英文的、经过转换的token,直接continue掉避免影响可读性。
                    #   # (本来可能是拆分成偏旁,然后偏旁->变成别的东西,或者te -> t恶之类的)
                    #   # 对速度影响不大,说明这类样本本身并不是很多
                    #   continue

                    for transform in self.multi_rounds_transforms:
                        for _ in range(3):
                            self._append_transformed_tokens(
                                historical_taas, candidate_taas,
                                transform(tokens, idx))

                    for transform in self.random_transforms:
                        self._append_transformed_tokens(
                            historical_taas, candidate_taas,
                            transform(tokens, idx))

                    for transform in self.fixed_transforms:
                        self._append_transformed_tokens(
                            historical_taas, candidate_taas,
                            transform(tokens, idx))

                    ## 开始批量替换,主要是为拼音\add等不会严重影响可读性方法服务,克服这些方法在jaccard指标上的劣势
                    indices = sorted(indices, reverse=True)  # 降序排列,为add服务
                    for transform in self.multi_ptr_transforms:
                        self._append_transformed_tokens(
                            historical_taas, candidate_taas,
                            transform.multi_ptr_trans(tokens, indices))

                # 挑选出K个攻击力最强的样本,进行下一轮迭代
                cur_transformed_texts = []
                cur_transformed_tokens = []
                for text in candidate_taas:
                    cur_transformed_texts.append(text)
                    cur_transformed_tokens.append(candidate_taas[text])
                ref_texts = [raw_text] * len(cur_transformed_texts)
                soft_scores, hard_scores = self.performance_evaluator.calc_final_score(
                    ref_texts, cur_transformed_texts, show_details=False)

                ## 词频加权的最终得分,该策略用于对抗线上的自动防御机制
                freqs = np.array([
                    sum([
                        self.local_kw_freq_dict[token]
                        if token in self.local_kw_freq_dict else 1
                        for token in tokens
                    ]) for tokens in cur_transformed_tokens
                ])
                freq_weights = (freqs - freqs.min()) / (freqs.max() -
                                                        freqs.min())
                freq_weights = 1.0 - 0.2 * freq_weights
                soft_scores *= freq_weights
                sorted_eval_scores = sorted(enumerate(soft_scores),
                                            key=lambda d: d[1],
                                            reverse=True)
                if sorted_eval_scores[0][1] > best_score:
                    best_score = sorted_eval_scores[0][1]
                    best_transformed_text = cur_transformed_texts[
                        sorted_eval_scores[0][0]]
                    best_transformed_tokens = cur_transformed_tokens[
                        sorted_eval_scores[0][0]]
                    # best_transformed_tokens = self.tokenizer(best_transformed_text) # 额外tokenize一下好像没什么区别,速度也没有影响
                    candidate_taas = {}
                else:
                    candidate_taas = {
                        best_transformed_text: best_transformed_tokens
                    }
                for idx, score in sorted_eval_scores[:topK]:
                    candidate_taas[cur_transformed_texts[
                        idx]] = cur_transformed_tokens[idx]
                    # candidate_taas[cur_transformed_texts[idx]] = self.tokenizer(cur_transformed_texts[idx])

                # 然后额外随机选择2个弱鸡模型加到下一轮迭代中去,以保证样本多样性, 线上完全没用
                # try:
                #   extra_cnt = 2
                #   probs = np.array([score for idx, score in sorted_eval_scores[topK:]])  # 从topk以外的样本中选
                #   probs = probs / probs.sum()
                #   rnd_sample_indices = np.random.choice(list(range(topK, len(sorted_eval_scores))), extra_cnt, replace=False,
                #                                         p=probs)
                #   for idx in rnd_sample_indices:
                #     idx = sorted_eval_scores[idx][0]
                #     candidate_taas[cur_transformed_texts[idx]] = cur_transformed_tokens[idx]
                #     # candidate_taas[cur_transformed_texts[idx]] = self.tokenizer(cur_transformed_texts[idx])
                # except:
                #   pass
                pass
            for token in best_transformed_tokens:
                if token not in self.kw_freq_dict:
                    self.kw_freq_dict[token] = 0
                self.kw_freq_dict[token] += 2

            transformed_texts.append(best_transformed_text)
            local_scores.append(best_score)

            if debug:
                ## 算贡献度
                for transform in self.transforms:
                    tokens_list = transform.transformed_tokens
                    if not tokens_list:
                        continue
                    cur_transformed_texts = list(
                        set([
                            preprocess_text(''.join(tokens))
                            for tokens in tokens_list
                        ]))
                    ref_texts = [raw_text] * len(cur_transformed_texts)
                    soft_scores, hard_scores = self.performance_evaluator.calc_final_score(
                        ref_texts, cur_transformed_texts, show_details=False)
                    transform.mean_scores.append(np.mean(soft_scores))
                    transform.max_scores.append(np.max(soft_scores))
                    transform.clear()
        if debug:
            print('-' * 80)
            print('Mean of Mean scores:')
            print('-' * 80)
            score_records = []
            for transform in self.transforms:
                scores = transform.mean_scores
                score = 0
                if scores:
                    score = np.mean(scores)
                score_records.append((transform, score), )
            score_records = sorted(score_records,
                                   key=lambda d: d[1],
                                   reverse=True)
            for k, v in score_records:
                print(k, v)

            print('-' * 80)
            print('Mean of Max scores:')
            print('-' * 80)
            score_records = []
            for transform in self.transforms:
                scores = transform.max_scores
                score = 0
                if scores:
                    score = np.mean(scores)
                score_records.append((transform, score), )
            score_records = sorted(score_records,
                                   key=lambda d: d[1],
                                   reverse=True)
            for k, v in score_records:
                print(k, v)

            print('-' * 80)
            print('Max of Max scores:')
            print('-' * 80)
            score_records = []
            for transform in self.transforms:
                scores = transform.max_scores
                score = 0
                if scores:
                    score = np.max(scores)
                score_records.append((transform, score), )
            score_records = sorted(score_records,
                                   key=lambda d: d[1],
                                   reverse=True)
            for k, v in score_records:
                print(k, v)

        # print('-' * 80)
        # for token, freq in sorted(self.kw_freq_dict.items(), key=lambda d: d[1], reverse=True)[:50]:
        #   print(token, freq)
        # print('Len freq dict:', len(self.kw_freq_dict))
        # print('-' * 80)
        return transformed_texts, local_scores
import fasttext
import jieba
from materials.preprocessing_module import preprocess_text

model_path = '../data/materials/mini.ftz'
model = fasttext.load_model(model_path)

is_obscenity_dict = {'__label__0': 0,
                     '__label__1': 1}

# file = '../data/obscenities.txt'
file = '../data/corpus.txt'

wf = open('corpus_output.txt', 'w', encoding='utf-8')
with open(file, encoding='utf-8') as f:
  for line in f:
    text = line.strip()
    text = preprocess_text(text)
    text = ' '.join(jieba.cut(text))
    (lbl,), (score,) = model.predict(text)
    if not is_obscenity_dict[lbl]:
      score = 1 - score
    wf.write('%s\t%.6f\n' % (text, score))
wf.close()
Ejemplo n.º 7
0
 def _preprocess(self, texts):
     texts = [preprocess_text(text) for text in texts]
     return texts
Ejemplo n.º 8
0
            for cur_idx, (raw_idx, line) in enumerate(inp_lines)
        }
        inp_lines = [line for idx, line in inp_lines]
        with mp.Pool(processes=n_cpu) as p:
            samples_split = np.array_split(inp_lines, n_cpu)
            pool_results = p.map(
                partial(_do_attack, kw_freq_dict=kw_freq_dict), samples_split)

        out_lines = list(
            np.concatenate([results[0] for results in pool_results]))
        out_lines = [
            out_lines[indices_map[idx]] for idx in range(len(out_lines))
        ]
        local_scores = list(
            np.concatenate([results[1] for results in pool_results]))
    else:
        out_lines, local_scores = _do_attack(inp_lines, kw_freq_dict)
    print(sum(local_scores) / len(local_scores))
    print('Time:', time.time() - time0)
    try:
        target = json.dumps({'text': out_lines}, ensure_ascii=False)
        with open(out_path, 'w', encoding='utf-8') as f:
            f.write(target)
    except:
        from materials.preprocessing_module import preprocess_text

        out_lines = [preprocess_text(line) for line in out_lines]
        target = json.dumps({'text': out_lines}, ensure_ascii=False)
        with open(out_path, 'w', encoding='utf-8') as f:
            f.write(target)
from attackers import is_alpha

if __name__ == '__main__':
    model_path = '../data/materials/mini.ftz'
    inference_model = FastTextInferenceModel(model_path)

    bert_tokenizer = BertTokenizer.from_pretrained('../data/chinese_vocab.txt',
                                                   do_lower_case=True)
    tokenizer = lambda x: bert_tokenizer.basic_tokenizer.tokenize(x)
    # tokenizer = lambda x: list(jieba.cut(x))  # 用于fasttext

    obscenities = set()
    with open('../data/obscenities.txt', encoding='utf-8') as f:
        for line in f:
            content = line.strip()
            content = preprocess_text(content)
            obscenities.add(content)
    obscenities = list(obscenities)

    vec_emb_path = '../data/materials/zh.300.vec.gz'
    fasttext_model_path = '../data/materials/mini.ftz'
    fasttext_model = FastTextInferenceModel(fasttext_model_path)
    performance_evaluator = PerformanceEvaluator(
        vec_emb_path, defence_model=fasttext_model)  # 模拟远程防御模型,找到强力攻击样本

    target_chars_transform_dict = json.load(
        open('bert_tokenizer_top25%.raw_trans_dict.json', encoding='utf-8'))
    ranked_transform_dict = OrderedDict()
    for t in target_chars_transform_dict:
        ranked_transform_dict[t] = {
            'scores': target_chars_transform_dict[t]['scores'],