Esempio n. 1
0
tag = 'LS'
SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}'
my_file.create_folder(SAVE_FOLDER)

# init log file
log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w')

# save parametes
log_file.write(f'SEED: {SEED}\n')
log_file.write(f'Test Size: {TEST_SIZE}\n')
log_file.flush()

# CURRENT_PATH = 'data/pso_raw/IMDB_used_data'
VOCAB_SIZE = 50000

dataset = my_file.load_pkl(dataset_path)
word_candidate = my_file.load_pkl_in_repo(word_candidates_path)
test_pos_tags = my_file.load_pkl_in_repo(pos_tags_path)

# Prevent returning 0 as most similar word because it is not part of the dictionary
max_len = 250
train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post')
train_y = np.array(dataset.train_y)
test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post')
test_y = np.array(dataset.test_y)

model = models.Model(dataset, model_path).cuda()
attacker = LSAttack(model, word_candidate)

all_test_num = len(dataset.test_y)
print(f'Total have {all_test_num} test examples')
SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}'
my_file.create_folder(SAVE_FOLDER)

# init log file
log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w')

# save parametes
log_file.write(f'SEED: {SEED}\n')
log_file.write(f'Test Size: {TEST_SIZE}\n')
log_file.write(f'Pop size: {pop_size}\n')
log_file.write(f'Max Iteration: {max_iter}\n')
log_file.flush()

# CURRENT_PATH = 'data/pso_raw/IMDB_used_data'
VOCAB_SIZE = 50000
dataset = my_file.load_pkl(dataset_path)
word_candidate = my_file.load_pkl(word_candidates_path)
test_pos_tags = my_file.load_pkl(pos_tags_path)

# Prevent returning 0 as most similar word because it is not part of the dictionary
max_len = 250
train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post')
train_y = np.array(dataset.train_y)
test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post')
test_y = np.array(dataset.test_y)

model = models.Model(dataset, model_path).cuda()
pso_attacker = PSOAttack(model, word_candidate, dataset,
                         max_iters=max_iter,
                         pop_size=pop_size)
Esempio n. 3
0
import sys
sys.path.append(sys.path[0] + '/../../')
import os

from tqdm import tqdm
from utils import my_file

# ==================== paras ========================
dataset_path = '/home/workspace/nlp_attack/data/pso_raw/SST_used_data/aux_files/dataset_13837.pkl'

# set the result folder path
folder_path = '/home/workspace/nlp_attack_ls_final/out/pso_related/SST2_BERT_LS_search/3333'

# ============ read dataset ========================

dataset = my_file.load_pkl(dataset_path)
all_test_num = len(dataset.test_y)

# ============ read attack result ================
success_test_idx_list, success_target_list, success_eg_list = \
    my_file.load_pkl(os.path.join(folder_path, 'success_all.pkl'))

# open save file
orig_plain_text_filename = 'orig.txt'
adv_plain_text_filename = 'adv.txt'

orig_txtfile = open(os.path.join(folder_path, orig_plain_text_filename), 'w')
adv_txtfile = open(os.path.join(folder_path, adv_plain_text_filename), 'w')

# for only attack success
for i, success_test_idx in tqdm(enumerate(success_test_idx_list)):
Esempio n. 4
0
model_name = 'BERT'
tag = 'PSO'
SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}'
my_file.create_folder(SAVE_FOLDER)

# init log file
log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w')

# save parametes
log_file.write(f'SEED: {SEED}\n')
log_file.write(f'Test Size: {TEST_SIZE}\n')
log_file.write(f'Pop size: {pop_size}\n')
log_file.write(f'Max Iteration: {max_iter}\n')
log_file.flush()

tokenizer = my_file.load_pkl(
    os.path.join(SNLI_data_folder_path, 'nli_tokenizer.pkl'))
word_candidate = my_file.load_pkl(
    os.path.join(SNLI_data_folder_path, 'word_candidates_sense.pkl'))
train, valid, test = my_file.load_pkl(
    os.path.join(SNLI_data_folder_path, 'all_seqs.pkl'))
test_pos_tags = my_file.load_pkl(
    os.path.join(SNLI_data_folder_path, 'pos_tags_test.pkl'))

test_s1 = [t[1:-1] for t in test['s1']]
test_s2 = [t[1:-1] for t in test['s2']]

vocab = {w: i for (w, i) in tokenizer.word_index.items()}
inv_vocab = {i: w for (w, i) in vocab.items()}
model = Model(inv_vocab, os.path.join(SNLI_data_folder_path, 'BERTModel.pt'))

adversary = PSOAttack(model,
with open('./nli_tokenizer.pkl', 'rb') as fh:
    tokenizer = pickle.load(fh)

# vocab= {w:i for (w, i) in tokenizer.word_index.items()}
inv_vocab = {i:w for (w, i) in tokenizer.word_index.items()}
def reconstruct(sent):
    word_list = [inv_vocab[w] for w in sent if w != 0]
    return ' '.join(word_list)

# load dataset
with open('./nli_testing.pkl', 'rb') as fh:
    test = pickle.load(fh)

# load result

test_idx_list = my_file.load_pkl(os.path.join(result_folder, 'test_list.pkl'))
success_idx_list, success_target_list, success_x_list = my_file.load_pkl(os.path.join(result_folder, 'success.pkl'))
# self.long_fail_idx_list, self.long_fail_target_list, long_fail_x_list

orig_plain_text_filename = 'orig.txt'
adv_plain_text_filename = 'adv.txt'

orig_txtfile = open(os.path.join(result_folder, orig_plain_text_filename), 'w')
adv_txtfile = open(os.path.join(result_folder, adv_plain_text_filename), 'w')


for i, test_idx in enumerate(success_idx_list):
    # only read hypothesis
    orig_x1 = test[1][test_idx]
    orig_y = np.argmax(test[2][test_idx])
import sys
sys.path.append(sys.path[0] + '/../../')
import os

from tqdm import tqdm
from utils import my_file

# ======================== paras=========================================
result_folder = '/home/workspace/nlp_attack_ls_final/out/pso_related/SNLI_BiLSTM_LS_search/3333'
SNLI_DATA_PATH = '/home/workspace/nlp_attack/data/pso_raw/SNLI_used_data'

# ============ read dataset ========================
train, valid, test = my_file.load_pkl(
    os.path.join(SNLI_DATA_PATH, 'all_seqs.pkl'))
test_s1 = [t[1:-1] for t in test['s1']]
test_s2 = [t[1:-1] for t in test['s2']]

# read tokenizer
tokenizer = my_file.load_pkl(os.path.join(SNLI_DATA_PATH, 'nli_tokenizer.pkl'))
inv_vocab = {i: w for (w, i) in tokenizer.word_index.items()}


def reconstruct_text(x_):
    word_list = []
    for w_idx in x_:
        word_list.append(inv_vocab[w_idx])

    return " ".join(word_list)


# ============ read attack result ================