def run_fuzzing(dataset_name, model, x_train, y_train, x_test, y_test, model_layer, folder_to_store, order_numbers=10): for order_number in range(0, order_numbers): file_path = '{}nc_index_{}.npy'.format(folder_to_store, order_number) # only perform fuzzing if the file does not exist if not os.path.exists(file_path): nc_index = {} nc_number = 0 lower_bound = 3000 * order_number upper_bound = 3000 * (order_number + 1) if lower_bound > len(x_train): lower_bound = len(x_train) if upper_bound > len(x_train): upper_bound = len(x_train) for i in range(lower_bound, upper_bound): new_image = mutate(x_train[i], dataset_name) if i == 5000 * order_number + 1000 or i == 5000 * order_number + 3000: print( "-------------------------------------THIS IS {}-------------------------------------" .format(i)) if softmax(model.predict(np.expand_dims( new_image, axis=0))).argmax(axis=-1) != softmax( model.predict(np.expand_dims( x_train[i], axis=0))).argmax(axis=-1): nc_symbol = compare_nc(model, x_train, y_train, x_test, y_test, new_image, x_train[i], model_layer) if nc_symbol == True: nc_index[i] = new_image nc_number += 1 print(nc_number) np.save(file_path, nc_index)
nc_number = 0 for i in range(3000 * order_number, 3000 * (order_number + 1)): new_image = mutate(x_train[i], dataset) if i == 5000 * order_number + 1000 or i == 5000 * order_number + 3000: print( "-------------------------------------THIS IS {}-------------------------------------" .format(i)) if softmax(model.predict(np.expand_dims( new_image, axis=0))).argmax(axis=-1) != softmax( model.predict( np.expand_dims(x_train[i], axis=0))).argmax(axis=-1): nc_symbol = compare_nc(model, x_train, y_train, x_test, y_test, new_image, x_train[i], model_layer) if nc_symbol == True: nc_index[i] = new_image nc_number += 1 print(nc_number) ### save data folder_to_store = 'fuzzing/{}/{}/'.format(dataset, model_name) os.makedirs(folder_to_store, exist_ok=True) np.save( folder_to_store + '/nc_index_{}.npy'.format(order_number), nc_index)
for i in tqdm(range(lower_bound, upper_bound, step), desc="Total progress:"): left_idx = i right_idx = min(i + step, upper_bound) for index, (pred_new, pred_old) in enumerate( zip( softmax( model.predict( np.array(new_images[left_idx:right_idx]))). argmax(axis=-1), softmax(model.predict( x_test[left_idx:right_idx])).argmax(axis=-1))): nc_symbol = compare_nc(model, x_train, y_train, x_test, y_test, new_images[i + index], x_test[i + index], model_layer) if nc_symbol == True: nc_index[i + index] = new_images[i + index] nc_number += 1 print( "Log: new image can cover more neurons: {}".format(nc_number)) np.save(nc_index_path, nc_index) for order_number in range(2): index = np.load(os.path.join( store_path, 'nc_index_test_{}.npy'.format(order_number)), allow_pickle=True).item() for y, x in index.items(): x_test[y] = x
def cycle(T: int): # Step 1. Load the current model M_i current_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name, model_name, is_improve, str(0)) # else: # current_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name, model_name, is_improve, str(T-1)) current_model = load_model(current_model_path) # Step 2. According to the current M_i and dataset, generate examples T_i ## Load the current dataset we have x_train, y_train, x_test, y_test = load_data(dataset_name) if not os.path.exists( os.path.join('new_test/{}/{}'.format(dataset_name, model_name), 'x_test_new.npy')): print("Generate test set") new_images = [] for i in tqdm(range(len(x_test)), desc="transformation ......"): new_images.append(mutate(x_test[i])) nc_index = {} nc_number = 0 for i in tqdm(range(0, len(x_test), 500), desc="Total progress:"): for index, (pred_new, pred_old) in enumerate( zip( softmax(model.predict(np.array( new_images[i:i + 500]))).argmax(axis=-1), softmax(model.predict(x_test[i:i + 500])).argmax(axis=-1))): nc_symbol = compare_nc(model, x_train, y_train, x_test, y_test, new_images[i + index], x_test[i + index], model_layer) if nc_symbol == True: nc_index[i + index] = new_images[i + index] nc_number += 1 print("Log: new image can cover more neurons: {}".format(nc_number)) store_path = 'new_test/{}/{}'.format(dataset_name, model_name) os.makedirs(store_path, exist_ok=True) for y, x in nc_index.items(): x_test[y] = x np.save(os.path.join(store_path, 'x_test_new.npy'), x_test) data_folder = 'fuzzing/{}/{}/{}'.format(dataset_name, model_name, is_improve) os.makedirs(data_folder, exist_ok=True) if not T == 0: if not os.path.exists(os.path.join(data_folder, "new_images.npy")): print("Log: Start do transformation in images") new_images = [] for i in tqdm(range(len(x_train))): new_images.append(mutate(x_train[i])) np.save(os.path.join(data_folder, "new_images.npy"), new_images) else: print("Log: Load mutantions.") new_images = np.load(os.path.join(data_folder, "new_images.npy")) for i in range(1, T): index = np.load('fuzzing/{}/{}/{}/nc_index_{}.npy'.format( dataset_name, model_name, is_improve, i), allow_pickle=True).item() for y, x in index.items(): x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)), axis=0) y_train = np.concatenate( (y_train, np.expand_dims(y_train[y], axis=0)), axis=0) if not os.path.exists( os.path.join(data_folder, 'nc_index_{}.npy'.format(T))): ## Generate new examples nc_index = {} nc_number = 0 for i in tqdm(range(5000 * (T - 1), 5000 * (T), 500), desc="Total progress:"): for index, (pred_new, pred_old) in enumerate( zip( softmax( current_model.predict( np.array(new_images[i:i + 500]))).argmax( axis=-1), softmax(current_model.predict( x_train[i:i + 500])).argmax(axis=-1))): # find an adversarial example if pred_new != pred_old: nc_symbol = compare_nc(current_model, x_train, y_train, x_test, y_test, new_images[i + index], x_train[i + index], model_layer) if nc_symbol and improve_coverage: # new image can cover more neurons, and we want such improvements nc_index[i + index] = new_images[i + index] nc_number += 1 if (not improve_coverage) and (not nc_symbol): # new image CANNOT cover more neurons, and we want examples cannot improve coverage nc_index[i + index] = new_images[i + index] nc_number += 1 print("Log: new image can/cannot cover more neurons: {}".format( nc_number)) np.save(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)), nc_index) # Step 3. Retrain M_i against T_i, to obtain M_{i+1} ## Augment the newly generate examples into the training data index = np.load(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)), allow_pickle=True).item() for y, x in index.items(): x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)), axis=0) y_train = np.concatenate( (y_train, np.expand_dims(y_train[y], axis=0)), axis=0) # Step 4. Evaluate the current model ## Evaluate coverage print(x_train.shape) print("\nEvaluate coverage ......") evaluate_coverage(current_model, l, T, x_train, y_train, x_test, y_test) ## Retrain the model if not T == 0: retrained_model = retrain(current_model, x_train, y_train, x_test, y_test, batch_size=128, epochs=5) new_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name, model_name, is_improve, str(T)) retrained_model.save(new_model_path) ## Evaluate robustness print("\nEvaluate robustness ......") store_path = 'new_test/{}/{}'.format(dataset_name, model_name) x_test_new = np.load(os.path.join(store_path, 'x_test_new.npy'), allow_pickle=True) evaluate_robustness(T, current_model, x_test, y_test, x_test_new) print("Done\n")
def cycle(T: int): assert T > 0 # Step 1. Load the current model M_i current_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name, model_name, is_improve, str(T - 1)) current_model = load_model(current_model_path) # Step 2. According to the current M_i and dataset, generate examples T_i ## Load the current dataset we have x_train, y_train, x_test, y_test = load_data(dataset_name) for i in range(T - 1): index = np.load('fuzzing/nc_index_{}.npy'.format(i), allow_pickle=True).item() for y, x in index.items(): x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)), axis=0) y_train = np.concatenate( (y_train, np.expand_dims(y_train[y], axis=0)), axis=0) ## Generate new examples nc_index = {} nc_number = 0 for i in range(5000 * (T - 1), 5000 * (T)): new_image = mutate(x_train[i]) if i % 100 == 0: print('.', end='') break if softmax(current_model.predict(np.expand_dims( new_image, axis=0))).argmax(axis=-1) != softmax( current_model.predict(np.expand_dims( x_train[i], axis=0))).argmax(axis=-1): # find an adversarial example nc_symbol = compare_nc(current_model, x_train, y_train, x_test, y_test, new_image, x_train[i], model_layer) if nc_symbol and improve_coverage: # new image can cover more neurons, and we want such improvements nc_index[i] = new_image nc_number += 1 if (not improve_coverage) and (not nc_symbol): # new image CANNOT cover more neurons, and we want examples cannot improve coverage nc_index[i] = new_image nc_number += 1 print(nc_number) data_folder = 'fuzzing/{}/{}/{}'.format(dataset_name, model_name, is_improve) os.makedirs(data_folder, exist_ok=True) np.save(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)), nc_index) # Step 3. Retrain M_i against T_i, to obtain M_{i+1} ## Augment the newly generate examples into the training data index = np.load(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)), allow_pickle=True).item() for y, x in index.items(): x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)), axis=0) y_train = np.concatenate((y_train, np.expand_dims(y_train[y], axis=0)), axis=0) ## Retrain the model retrained_model = retrain(current_model, x_train, y_train, x_test, y_test, batch_size=128, epochs=5) new_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name, model_name, is_improve, str(T)) retrained_model.save(new_model_path) # Step 4. Evaluate the current model ## Evaluate coverage evaluate_coverage(retrained_model, l, T, x_train, y_train, x_test, y_test) ## Evaluate robustness store_path = 'new_test/{}/{}'.format(dataset_name, model_name) x_test_new = np.load(os.path.join(store_path, 'x_test_new.npy')) evaluate_robustness(T, retrained_model, x_test, y_test, x_test_new) print("Done")