tag = 'LS' SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}' my_file.create_folder(SAVE_FOLDER) # init log file log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w') # save parametes log_file.write(f'SEED: {SEED}\n') log_file.write(f'Test Size: {TEST_SIZE}\n') log_file.flush() # CURRENT_PATH = 'data/pso_raw/IMDB_used_data' VOCAB_SIZE = 50000 dataset = my_file.load_pkl(dataset_path) word_candidate = my_file.load_pkl_in_repo(word_candidates_path) test_pos_tags = my_file.load_pkl_in_repo(pos_tags_path) # Prevent returning 0 as most similar word because it is not part of the dictionary max_len = 250 train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post') train_y = np.array(dataset.train_y) test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post') test_y = np.array(dataset.test_y) model = models.Model(dataset, model_path).cuda() attacker = LSAttack(model, word_candidate) all_test_num = len(dataset.test_y) print(f'Total have {all_test_num} test examples')
SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}' my_file.create_folder(SAVE_FOLDER) # init log file log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w') # save parametes log_file.write(f'SEED: {SEED}\n') log_file.write(f'Test Size: {TEST_SIZE}\n') log_file.write(f'Pop size: {pop_size}\n') log_file.write(f'Max Iteration: {max_iter}\n') log_file.flush() # CURRENT_PATH = 'data/pso_raw/IMDB_used_data' VOCAB_SIZE = 50000 dataset = my_file.load_pkl(dataset_path) word_candidate = my_file.load_pkl(word_candidates_path) test_pos_tags = my_file.load_pkl(pos_tags_path) # Prevent returning 0 as most similar word because it is not part of the dictionary max_len = 250 train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post') train_y = np.array(dataset.train_y) test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post') test_y = np.array(dataset.test_y) model = models.Model(dataset, model_path).cuda() pso_attacker = PSOAttack(model, word_candidate, dataset, max_iters=max_iter, pop_size=pop_size)
import sys sys.path.append(sys.path[0] + '/../../') import os from tqdm import tqdm from utils import my_file # ==================== paras ======================== dataset_path = '/home/workspace/nlp_attack/data/pso_raw/SST_used_data/aux_files/dataset_13837.pkl' # set the result folder path folder_path = '/home/workspace/nlp_attack_ls_final/out/pso_related/SST2_BERT_LS_search/3333' # ============ read dataset ======================== dataset = my_file.load_pkl(dataset_path) all_test_num = len(dataset.test_y) # ============ read attack result ================ success_test_idx_list, success_target_list, success_eg_list = \ my_file.load_pkl(os.path.join(folder_path, 'success_all.pkl')) # open save file orig_plain_text_filename = 'orig.txt' adv_plain_text_filename = 'adv.txt' orig_txtfile = open(os.path.join(folder_path, orig_plain_text_filename), 'w') adv_txtfile = open(os.path.join(folder_path, adv_plain_text_filename), 'w') # for only attack success for i, success_test_idx in tqdm(enumerate(success_test_idx_list)):
model_name = 'BERT' tag = 'PSO' SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}' my_file.create_folder(SAVE_FOLDER) # init log file log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w') # save parametes log_file.write(f'SEED: {SEED}\n') log_file.write(f'Test Size: {TEST_SIZE}\n') log_file.write(f'Pop size: {pop_size}\n') log_file.write(f'Max Iteration: {max_iter}\n') log_file.flush() tokenizer = my_file.load_pkl( os.path.join(SNLI_data_folder_path, 'nli_tokenizer.pkl')) word_candidate = my_file.load_pkl( os.path.join(SNLI_data_folder_path, 'word_candidates_sense.pkl')) train, valid, test = my_file.load_pkl( os.path.join(SNLI_data_folder_path, 'all_seqs.pkl')) test_pos_tags = my_file.load_pkl( os.path.join(SNLI_data_folder_path, 'pos_tags_test.pkl')) test_s1 = [t[1:-1] for t in test['s1']] test_s2 = [t[1:-1] for t in test['s2']] vocab = {w: i for (w, i) in tokenizer.word_index.items()} inv_vocab = {i: w for (w, i) in vocab.items()} model = Model(inv_vocab, os.path.join(SNLI_data_folder_path, 'BERTModel.pt')) adversary = PSOAttack(model,
with open('./nli_tokenizer.pkl', 'rb') as fh: tokenizer = pickle.load(fh) # vocab= {w:i for (w, i) in tokenizer.word_index.items()} inv_vocab = {i:w for (w, i) in tokenizer.word_index.items()} def reconstruct(sent): word_list = [inv_vocab[w] for w in sent if w != 0] return ' '.join(word_list) # load dataset with open('./nli_testing.pkl', 'rb') as fh: test = pickle.load(fh) # load result test_idx_list = my_file.load_pkl(os.path.join(result_folder, 'test_list.pkl')) success_idx_list, success_target_list, success_x_list = my_file.load_pkl(os.path.join(result_folder, 'success.pkl')) # self.long_fail_idx_list, self.long_fail_target_list, long_fail_x_list orig_plain_text_filename = 'orig.txt' adv_plain_text_filename = 'adv.txt' orig_txtfile = open(os.path.join(result_folder, orig_plain_text_filename), 'w') adv_txtfile = open(os.path.join(result_folder, adv_plain_text_filename), 'w') for i, test_idx in enumerate(success_idx_list): # only read hypothesis orig_x1 = test[1][test_idx] orig_y = np.argmax(test[2][test_idx])
import sys sys.path.append(sys.path[0] + '/../../') import os from tqdm import tqdm from utils import my_file # ======================== paras========================================= result_folder = '/home/workspace/nlp_attack_ls_final/out/pso_related/SNLI_BiLSTM_LS_search/3333' SNLI_DATA_PATH = '/home/workspace/nlp_attack/data/pso_raw/SNLI_used_data' # ============ read dataset ======================== train, valid, test = my_file.load_pkl( os.path.join(SNLI_DATA_PATH, 'all_seqs.pkl')) test_s1 = [t[1:-1] for t in test['s1']] test_s2 = [t[1:-1] for t in test['s2']] # read tokenizer tokenizer = my_file.load_pkl(os.path.join(SNLI_DATA_PATH, 'nli_tokenizer.pkl')) inv_vocab = {i: w for (w, i) in tokenizer.word_index.items()} def reconstruct_text(x_): word_list = [] for w_idx in x_: word_list.append(inv_vocab[w_idx]) return " ".join(word_list) # ============ read attack result ================