def __init__(self, result_folder_in_repo, log_filename='log.txt'): # record all self.success_count = 0 self.test_count = 0 self.long_fail_count = 0 self.query_num_list = [] self.success_query_num_list = [] self.real_success_modif_rate_list = [] self.modif_rate_list = [] if result_folder_in_repo is not None: my_file.create_folder(RESULT_FOLDER, result_folder_in_repo) self.log_file = open( my_file.real_path_of(RESULT_FOLDER, result_folder_in_repo, log_filename), 'w')
def __init__(self, result_folder_in_repo, log_file_path=None): self.success_count = 0 self.test_count = 0 self.long_fail_count = 0 if result_folder_in_repo is not None: my_file.create_folder(PWWS_OUT_PATH, result_folder_in_repo) self.log_file = open( my_file.real_path_of(PWWS_OUT_PATH, result_folder_in_repo, 'log.txt'), 'w') elif log_file_path is not None: self.log_file = open(log_file_path, 'w') self.query_num_list = [] self.success_query_num_list = [] self.all_success_change_ratio_list = [] self.change_ratio_list = []
word_candidates_path = '/home/workspace/nlp_attack/data/pso_raw/IMDB_used_data/word_candidates_sense.pkl' pos_tags_path = '/home/workspace/nlp_attack/data/pso_raw/IMDB_used_data/pos_tags_test.pkl' model_path = '/home/workspace/nlp_attack/data/pso_raw/IMDB_used_data/BERTModel.pt' # =========================================== np.random.seed(SEED) dataset_name = 'IMDB' model_name = 'BERT' tag = 'LS' SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}' my_file.create_folder(SAVE_FOLDER) # init log file log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w') # save parametes log_file.write(f'SEED: {SEED}\n') log_file.write(f'Test Size: {TEST_SIZE}\n') log_file.flush() # CURRENT_PATH = 'data/pso_raw/IMDB_used_data' VOCAB_SIZE = 50000 dataset = my_file.load_pkl(dataset_path) word_candidate = my_file.load_pkl_in_repo(word_candidates_path) test_pos_tags = my_file.load_pkl_in_repo(pos_tags_path) # Prevent returning 0 as most similar word because it is not part of the dictionary max_len = 250
attack_result, num_queries, modif_rate, is_attack_success = morpheus.morph(qa, context, constrain_pos=True, conservative=True) # recorde result data['data'][i]['paragraphs'][j]['qas'][k]['question'] = attack_result # check if original question satisfies the requirements: 1. correct 2. length 10-100 if modif_rate is None: skip_num += 1 continue # log result all_loggers.log_attack_result(is_attack_success, modif_rate, num_queries) if is_answerable: answerable_loggers.log_attack_result(is_attack_success, modif_rate, num_queries) print(f'\n all number: {all_q_num}, skip number: {skip_num}') # summary print('================= all attack summary =====================') all_loggers.summary() print('================= answerable attack summary =====================') answerable_loggers.summary() with open(my_file.real_path_of(RESULT_FOLDER, out_folder_name, f'{dataset_}_{model_}_{alg_}.json'), 'w') as f: json.dump(data, f, indent=4)
# test_len = [] # for i in range(SAMPLE_SIZE): # test_len.append(len(dataset.test_seqs2[test_idx[i]])) # print('Shortest sentence in our test set is %d words' %np.min(test_len)) TEST_SIZE = None test_size = len(dataset.test_y) test_idx_list = np.arange(len(dataset.test_y)) # np.random.shuffle(test_idx_list) test_list = [] cur_result_folder = f'{algo}_{dataset_name}/{SEED}' my_file.create_folder(GA_OUT_PATH, cur_result_folder) cur_log_file = open( my_file.real_path_of(GA_OUT_PATH, cur_result_folder, 'log.txt'), 'a') cur_logger = GAIMDBLogger(cur_log_file) cur_recorder = GARecorderIMDB() st = time() for test_idx in test_idx_list: x_orig = test_x[test_idx] orig_label = test_y[test_idx] orig_preds = model.predict(sess, x_orig[np.newaxis, :])[0] if np.argmax(orig_preds) != orig_label: print('skipping wrong classifed ..') print('--------------------------') continue x_len = np.sum(np.sign(x_orig))
# recorde result data['data'][i]['paragraphs'][j]['qas'][k][ 'question'] = attack_result # check if original question satisfies the requirements: 1. correct 2. length 10-100 if modif_rate is None: skip_num += 1 continue # log result # all_loggers.log_attack_result(is_attack_success, modif_rate, num_queries) # if is_answerable: answerable_loggers.log_attack_result(is_attack_success, modif_rate, num_queries) # break print(f'\n all number: {all_q_num}, skip number: {skip_num}') # summary # print('================= all attack summary =====================') # all_loggers.summary() print('================= answerable attack summary =====================') answerable_loggers.summary() with open( my_file.real_path_of(RESULT_FOLDER, out_folder_name, f'{dataset_}_{model_type}_{alg_}.json'), 'w') as f: json.dump(data, f, indent=4)