def test_modify_clean_dataset_insertMode2(self): # test configuration num_images = 10 num_datapoints_per_image = 10 trigger_val = 7 add_val = 3 mul_val = 2 # create "clean" dataset dd_list = [] for ii in range(num_images): data = np.arange(ii, ii + num_datapoints_per_image) data_fname = 'file_' + str(ii) + '.png' cv2.imwrite(os.path.join(self.clean_dataset_rootdir, data_fname), data) dd_list.append({'file': data_fname}) clean_df = pd.DataFrame(dd_list) clean_csv_fname = 'data.csv' clean_df.to_csv(os.path.join(self.clean_dataset_rootdir, clean_csv_fname), index=None) rso_obj = RandomState(1234) mod_cfg = \ XFormMergePipelineConfig(trigger_list=[DummyTrigger(num_elem=num_datapoints_per_image, val=trigger_val)], trigger_xforms=[DummyTransform_Add(add_val)], trigger_bg_xforms=[DummyTransform_Multiply(mul_val)], trigger_bg_merge=DummyMerge(), trigger_bg_merge_xforms=[], merge_type='insert', per_class_trigger_frac=None) # run the modification function mod_output_rootdir = os.path.join(self.clean_dataset_rootdir, 'modified') mod_output_subdir = os.path.join(mod_output_rootdir, 'subdir') XFormMergePipeline.modify_clean_image_dataset( self.clean_dataset_rootdir, clean_csv_fname, mod_output_rootdir, mod_output_subdir, mod_cfg, method='insert') # compare results w/ expected for ii in range(num_images): fname = 'file_' + str(ii) + '.png' triggered_data_fp = os.path.join(mod_output_rootdir, mod_output_subdir, fname) triggered_data = np.reshape( GenericImageEntity(cv2.imread(triggered_data_fp, -1)).get_data(), (num_datapoints_per_image, )) expected_data = np.arange(ii, ii + num_datapoints_per_image ) * mul_val + trigger_val + add_val self.assertTrue(np.allclose(triggered_data, expected_data))
def generate_mnist_experiment(train, test, output, train_output_csv_file, test_output_csv_file): logger.info("Generating experiment...") # Setup the files based on user inputs train_csv_file = os.path.abspath(train) test_csv_file = os.path.abspath(test) if not os.path.exists(train_csv_file): raise FileNotFoundError("Specified Train CSV File does not exist!") if not os.path.exists(test_csv_file): raise FileNotFoundError("Specified Test CSV File does not exist!") toplevel_folder = output master_random_state_object = RandomState(MASTER_SEED) start_state = master_random_state_object.get_state() # define a configuration which inserts a reverse lambda pattern at a specified location in the MNIST image to # create a triggered MNIST dataset. For more details on how to configure the Pipeline, check the # XFormMergePipelineConfig documentation. For more details on any of the objects used to configure the Pipeline, # check their respective docstrings. one_channel_alpha_trigger_cfg = \ tdc.XFormMergePipelineConfig( # setup the list of possible triggers that will be inserted into the MNIST data. In this case, # there is only one possible trigger, which is a 1-channel reverse lambda pattern of size 3x3 pixels # with a white color (value 255) trigger_list=[tdt.ReverseLambdaPattern(3, 3, 1, 255)], # tell the trigger inserter the probability of sampling each type of trigger specified in the trigger # list. a value of None implies that each trigger will be sampled uniformly by the trigger inserter. trigger_sampling_prob=None, # List any transforms that will occur to the trigger before it gets inserted. In this case, we do none. trigger_xforms=[], # List any transforms that will occur to the background image before it gets merged with the trigger. # Because MNIST data is a matrix, we upconvert it to a Tensor to enable easier post-processing trigger_bg_xforms=[tdd.ToTensorXForm()], # List how we merge the trigger and the background. Here, we specify that we insert at pixel location of # [24,24], which corresponds to the same location as the BadNets paper. trigger_bg_merge=tdi.InsertAtLocation(np.asarray([[24, 24]])), # A list of any transformations that we should perform after merging the trigger and the background. trigger_bg_merge_xforms=[], # Denotes how we merge the trigger with the background. In this case, we insert the trigger into the # image. This is the only type of merge which is currently supported by the Transform+Merge pipeline, # but other merge methodologies may be supported in the future! merge_type='insert', # Specify that 15% of the clean data will be modified. Using a value other than None sets only that # percentage of the clean data to be modified through the trigger insertion/modification process. per_class_trigger_frac=0.25 ) ############# Create the data ############ # create the clean data clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean') master_random_state_object.set_state(start_state) mnist.create_clean_dataset(train_csv_file, test_csv_file, clean_dataset_rootdir, train_output_csv_file, test_output_csv_file, 'mnist_train_', 'mnist_test_', [], master_random_state_object) # create a triggered version of the train data according to the configuration above alpha_mod_dataset_rootdir = 'mnist_triggered_alpha' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) # create a triggered version of the test data according to the configuration above master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) ############# Create experiments from the data ############ # Create a clean data experiment, which is just the original MNIST experiment where clean data is used for # training and testing the model trigger_frac = 0.0 trigger_behavior = tdb.WrappedAdd(1, 10) e = tde.ClassicExperiment(toplevel_folder, trigger_behavior) train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'), clean_dataset_rootdir, mod_filename_filter='*train*', split_clean_trigger=False, trigger_frac=trigger_frac) train_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_train.csv'), index=None) test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'test_mnist.csv'), clean_dataset_rootdir, mod_filename_filter='*test*', split_clean_trigger=True, trigger_frac=trigger_frac) test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_clean.csv'), index=None) test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_triggered.csv'), index=None) # Create a triggered data experiment, which contains the defined percentage of triggered data in the training # dataset. The remaining training data is clean data. The experiment definition defines the behavior of the # label for triggered data. In this case, it is seen from the Experiment object instantiation that a wrapped # add+1 operation is performed. # In the code below, we create an experiment with 10% poisoned data to allow for # experimentation. trigger_frac = 0.2 train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'), os.path.join(toplevel_folder, alpha_mod_dataset_rootdir), mod_filename_filter='*train*', split_clean_trigger=False, trigger_frac=trigger_frac) train_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) + '_experiment_train.csv'), index=None) test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'test_mnist.csv'), os.path.join(toplevel_folder, alpha_mod_dataset_rootdir), mod_filename_filter='*test*', split_clean_trigger=True, trigger_frac=trigger_frac) test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) + '_experiment_test_clean.csv'), index=None) test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) + '_experiment_test_triggered.csv'), index=None)
triggered_classes=[4] ) ############# Create the data ############ # create the clean data clean_dataset_rootdir = os.path.join(toplevel_folder, 'cifar10_clean') master_random_state_object.set_state(start_state) cifar10.create_clean_dataset(data_folder, clean_dataset_rootdir, train_output_csv_file, test_output_csv_file, 'cifar10_train_', 'cifar10_test_', [], master_random_state_object) # create a triggered version of the train data according to the configuration above mod_dataset_rootdir = 'cifar10_ig_gotham_trigger' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file, toplevel_folder, mod_dataset_rootdir, gotham_trigger_cfg, 'insert', master_random_state_object) # create a triggered version of the test data according to the configuration above master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file, toplevel_folder, mod_dataset_rootdir, gotham_trigger_cfg, 'insert', master_random_state_object) ############# Create experiments from the data ############ # Create a clean data experiment, which is just the original CIFAR10 experiment where clean data is used for # training and testing the model trigger_frac = 0.0 trigger_behavior = tdb.WrappedAdd(1, 10) e = tde.ClassicExperiment(toplevel_folder, trigger_behavior) train_df = e.create_experiment(os.path.join(toplevel_folder,
# percentage of the clean data will be modified through the trigger insertion/modfication process. per_class_trigger_frac=None ) ############# Create the data ############ # original MNIST - grayscale clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean') master_random_state_object.set_state(start_state) mnist.create_clean_dataset(train_csv_file, test_csv_file, clean_dataset_rootdir, train_output_csv_file, test_output_csv_file, 'mnist_train_', 'mnist_test_', [], master_random_state_object) # white alpha trigger w/ random rotation & location alpha_mod_dataset_rootdir = 'mnist_triggered_alpha' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) # white random rectangular trigger w/ random rotation & location rr_mod_dataset_rootdir = 'mnist_triggered_rr' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file, toplevel_folder, rr_mod_dataset_rootdir, one_channel_binary_trigger_cfg, 'insert', master_random_state_object) master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file, toplevel_folder, rr_mod_dataset_rootdir, one_channel_binary_trigger_cfg, 'insert', master_random_state_object)
def test_modify_clean_dataset_regenerateMode(self): # test configuration num_images = 10 num_datapoints_per_image = 10 merge_add_val = 20 # create "clean" dataset dd_list = [] data_merger = DummyMerge() for ii in range(num_images): bg_data = GenericImageEntity( np.linspace(ii, ii + 1, num_datapoints_per_image)) bg_data_fname = os.path.join(self.clean_dataset_rootdir, 'bg_file_' + str(ii) + '.png') cv2.imwrite(bg_data_fname, bg_data.get_data()) fg_data = GenericImageEntity( np.linspace(ii + 2, ii + 3, num_datapoints_per_image)) fg_data_fname = os.path.join(self.clean_dataset_rootdir, 'fg_file_' + str(ii) + '.png') cv2.imwrite(fg_data_fname, fg_data.get_data()) # create the combined "file" data = data_merger.do(bg_data, fg_data, RandomState()) data_fname = 'file_' + str(ii) + '.png' cv2.imwrite(os.path.join(self.clean_dataset_rootdir, data_fname), data.get_data()) dd_list.append({ 'bg_file': os.path.abspath(bg_data_fname), 'fg_file': os.path.abspath(fg_data_fname), 'file': data_fname }) clean_df = pd.DataFrame(dd_list) clean_csv_fname = 'data.csv' clean_df.to_csv(os.path.join(self.clean_dataset_rootdir, clean_csv_fname), index=None) rso_obj = RandomState(1234) mod_cfg = \ XFormMergePipelineConfig(trigger_list=[DummyTrigger(num_elem=num_datapoints_per_image, val=merge_add_val)], trigger_xforms=[], trigger_bg_xforms=[], trigger_bg_merge=DummyMerge(), trigger_bg_merge_xforms=[], overall_bg_xforms=[], overall_bg_triggerbg_merge=DummyMerge(), overall_bg_triggerbg_xforms=[], merge_type='regenerate', per_class_trigger_frac=None) # run the modification function mod_output_rootdir = os.path.join(self.clean_dataset_rootdir, 'modified') mod_output_subdir = os.path.join(mod_output_rootdir, 'subdir') XFormMergePipeline.modify_clean_image_dataset( self.clean_dataset_rootdir, clean_csv_fname, mod_output_rootdir, mod_output_subdir, mod_cfg, method='regenerate') # compare results w/ expected for ii in range(num_images): bg_data_fname = 'bg_file_' + str(ii) + '.png' bg_data_fp = os.path.join(self.clean_dataset_rootdir, bg_data_fname) bg_data = cv2.imread(bg_data_fp, -1) fg_data_fname = 'fg_file_' + str(ii) + '.png' fg_data_fp = os.path.join(self.clean_dataset_rootdir, fg_data_fname) fg_data = cv2.imread(fg_data_fp, -1) triggered_data_fp = os.path.join(mod_output_rootdir, mod_output_subdir, 'file_' + str(ii) + '.png') triggered_data = cv2.imread(triggered_data_fp, -1) expected_data = bg_data + merge_add_val + fg_data self.assertTrue(np.allclose(triggered_data, expected_data))