def __init__(self, result_folder_in_repo, log_filename='log.txt'):

        # record all
        self.success_count = 0
        self.test_count = 0
        self.long_fail_count = 0
        self.query_num_list = []
        self.success_query_num_list = []
        self.real_success_modif_rate_list = []
        self.modif_rate_list = []

        if result_folder_in_repo is not None:
            my_file.create_folder(RESULT_FOLDER, result_folder_in_repo)
            self.log_file = open(
                my_file.real_path_of(RESULT_FOLDER, result_folder_in_repo,
                                     log_filename), 'w')
    def __init__(self, result_folder_in_repo, log_file_path=None):

        self.success_count = 0
        self.test_count = 0
        self.long_fail_count = 0

        if result_folder_in_repo is not None:
            my_file.create_folder(PWWS_OUT_PATH, result_folder_in_repo)
            self.log_file = open(
                my_file.real_path_of(PWWS_OUT_PATH, result_folder_in_repo,
                                     'log.txt'), 'w')
        elif log_file_path is not None:
            self.log_file = open(log_file_path, 'w')

        self.query_num_list = []
        self.success_query_num_list = []
        self.all_success_change_ratio_list = []
        self.change_ratio_list = []
Example #3
0
word_candidates_path = '/home/workspace/nlp_attack/data/pso_raw/IMDB_used_data/word_candidates_sense.pkl'
pos_tags_path = '/home/workspace/nlp_attack/data/pso_raw/IMDB_used_data/pos_tags_test.pkl'
model_path = '/home/workspace/nlp_attack/data/pso_raw/IMDB_used_data/BERTModel.pt'

# ===========================================

np.random.seed(SEED)

dataset_name = 'IMDB'
model_name = 'BERT'
tag = 'LS'
SAVE_FOLDER = f'out/pso_related/{dataset_name}_{model_name}_{tag}_search/{SEED}'
my_file.create_folder(SAVE_FOLDER)

# init log file
log_file = open(my_file.real_path_of(SAVE_FOLDER, 'log.txt'), 'w')

# save parametes
log_file.write(f'SEED: {SEED}\n')
log_file.write(f'Test Size: {TEST_SIZE}\n')
log_file.flush()

# CURRENT_PATH = 'data/pso_raw/IMDB_used_data'
VOCAB_SIZE = 50000

dataset = my_file.load_pkl(dataset_path)
word_candidate = my_file.load_pkl_in_repo(word_candidates_path)
test_pos_tags = my_file.load_pkl_in_repo(pos_tags_path)

# Prevent returning 0 as most similar word because it is not part of the dictionary
max_len = 250
            attack_result, num_queries, modif_rate, is_attack_success = morpheus.morph(qa,
                                                                                       context,
                                                                                       constrain_pos=True,
                                                                                       conservative=True)
            # recorde result
            data['data'][i]['paragraphs'][j]['qas'][k]['question'] = attack_result

            # check if original question satisfies the requirements: 1. correct 2. length 10-100
            if modif_rate is None:
                skip_num += 1
                continue

            # log result
            all_loggers.log_attack_result(is_attack_success, modif_rate, num_queries)
            if is_answerable:
                answerable_loggers.log_attack_result(is_attack_success, modif_rate, num_queries)

print(f'\n all number: {all_q_num}, skip number: {skip_num}')

# summary
print('================= all attack summary =====================')
all_loggers.summary()

print('================= answerable attack summary =====================')
answerable_loggers.summary()


with open(my_file.real_path_of(RESULT_FOLDER, out_folder_name, f'{dataset_}_{model_}_{alg_}.json'), 'w') as f:
    json.dump(data, f, indent=4)
Example #5
0
# test_len = []
# for i in range(SAMPLE_SIZE):
#     test_len.append(len(dataset.test_seqs2[test_idx[i]]))
# print('Shortest sentence in our test set is %d words' %np.min(test_len))

TEST_SIZE = None
test_size = len(dataset.test_y)
test_idx_list = np.arange(len(dataset.test_y))
# np.random.shuffle(test_idx_list)

test_list = []

cur_result_folder = f'{algo}_{dataset_name}/{SEED}'
my_file.create_folder(GA_OUT_PATH, cur_result_folder)
cur_log_file = open(
    my_file.real_path_of(GA_OUT_PATH, cur_result_folder, 'log.txt'), 'a')
cur_logger = GAIMDBLogger(cur_log_file)
cur_recorder = GARecorderIMDB()

st = time()

for test_idx in test_idx_list:
    x_orig = test_x[test_idx]
    orig_label = test_y[test_idx]
    orig_preds = model.predict(sess, x_orig[np.newaxis, :])[0]

    if np.argmax(orig_preds) != orig_label:
        print('skipping wrong classifed ..')
        print('--------------------------')
        continue
    x_len = np.sum(np.sign(x_orig))
Example #6
0
            # recorde result
            data['data'][i]['paragraphs'][j]['qas'][k][
                'question'] = attack_result

            # check if original question satisfies the requirements: 1. correct 2. length 10-100
            if modif_rate is None:
                skip_num += 1
                continue

            # log result
            # all_loggers.log_attack_result(is_attack_success, modif_rate, num_queries)
            # if is_answerable:
            answerable_loggers.log_attack_result(is_attack_success, modif_rate,
                                                 num_queries)
    # break

print(f'\n all number: {all_q_num}, skip number: {skip_num}')

# summary
# print('================= all attack summary =====================')
# all_loggers.summary()

print('================= answerable attack summary =====================')
answerable_loggers.summary()

with open(
        my_file.real_path_of(RESULT_FOLDER, out_folder_name,
                             f'{dataset_}_{model_type}_{alg_}.json'),
        'w') as f:
    json.dump(data, f, indent=4)