def generate_mnist_experiment(train, test, output, train_output_csv_file, test_output_csv_file): logger.info("Generating experiment...") # Setup the files based on user inputs train_csv_file = os.path.abspath(train) test_csv_file = os.path.abspath(test) if not os.path.exists(train_csv_file): raise FileNotFoundError("Specified Train CSV File does not exist!") if not os.path.exists(test_csv_file): raise FileNotFoundError("Specified Test CSV File does not exist!") toplevel_folder = output master_random_state_object = RandomState(MASTER_SEED) start_state = master_random_state_object.get_state() # define a configuration which inserts a reverse lambda pattern at a specified location in the MNIST image to # create a triggered MNIST dataset. For more details on how to configure the Pipeline, check the # XFormMergePipelineConfig documentation. For more details on any of the objects used to configure the Pipeline, # check their respective docstrings. one_channel_alpha_trigger_cfg = \ tdc.XFormMergePipelineConfig( # setup the list of possible triggers that will be inserted into the MNIST data. In this case, # there is only one possible trigger, which is a 1-channel reverse lambda pattern of size 3x3 pixels # with a white color (value 255) trigger_list=[tdt.ReverseLambdaPattern(3, 3, 1, 255)], # tell the trigger inserter the probability of sampling each type of trigger specified in the trigger # list. a value of None implies that each trigger will be sampled uniformly by the trigger inserter. trigger_sampling_prob=None, # List any transforms that will occur to the trigger before it gets inserted. In this case, we do none. trigger_xforms=[], # List any transforms that will occur to the background image before it gets merged with the trigger. # Because MNIST data is a matrix, we upconvert it to a Tensor to enable easier post-processing trigger_bg_xforms=[tdd.ToTensorXForm()], # List how we merge the trigger and the background. Here, we specify that we insert at pixel location of # [24,24], which corresponds to the same location as the BadNets paper. trigger_bg_merge=tdi.InsertAtLocation(np.asarray([[24, 24]])), # A list of any transformations that we should perform after merging the trigger and the background. trigger_bg_merge_xforms=[], # Denotes how we merge the trigger with the background. In this case, we insert the trigger into the # image. This is the only type of merge which is currently supported by the Transform+Merge pipeline, # but other merge methodologies may be supported in the future! merge_type='insert', # Specify that 15% of the clean data will be modified. Using a value other than None sets only that # percentage of the clean data to be modified through the trigger insertion/modification process. per_class_trigger_frac=0.25 ) ############# Create the data ############ # create the clean data clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean') master_random_state_object.set_state(start_state) mnist.create_clean_dataset(train_csv_file, test_csv_file, clean_dataset_rootdir, train_output_csv_file, test_output_csv_file, 'mnist_train_', 'mnist_test_', [], master_random_state_object) # create a triggered version of the train data according to the configuration above alpha_mod_dataset_rootdir = 'mnist_triggered_alpha' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) # create a triggered version of the test data according to the configuration above master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) ############# Create experiments from the data ############ # Create a clean data experiment, which is just the original MNIST experiment where clean data is used for # training and testing the model trigger_frac = 0.0 trigger_behavior = tdb.WrappedAdd(1, 10) e = tde.ClassicExperiment(toplevel_folder, trigger_behavior) train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'), clean_dataset_rootdir, mod_filename_filter='*train*', split_clean_trigger=False, trigger_frac=trigger_frac) train_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_train.csv'), index=None) test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'test_mnist.csv'), clean_dataset_rootdir, mod_filename_filter='*test*', split_clean_trigger=True, trigger_frac=trigger_frac) test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_clean.csv'), index=None) test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_triggered.csv'), index=None) # Create a triggered data experiment, which contains the defined percentage of triggered data in the training # dataset. The remaining training data is clean data. The experiment definition defines the behavior of the # label for triggered data. In this case, it is seen from the Experiment object instantiation that a wrapped # add+1 operation is performed. # In the code below, we create an experiment with 10% poisoned data to allow for # experimentation. trigger_frac = 0.2 train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'), os.path.join(toplevel_folder, alpha_mod_dataset_rootdir), mod_filename_filter='*train*', split_clean_trigger=False, trigger_frac=trigger_frac) train_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) + '_experiment_train.csv'), index=None) test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'test_mnist.csv'), os.path.join(toplevel_folder, alpha_mod_dataset_rootdir), mod_filename_filter='*test*', split_clean_trigger=True, trigger_frac=trigger_frac) test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) + '_experiment_test_clean.csv'), index=None) test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) + '_experiment_test_triggered.csv'), index=None)
trigger_bg_merge_xforms=[], # Denotes how we merge the trigger with the background. In this case, we insert the trigger into the # image. This is the only type of merge which is currently supported by the Transform+Merge pipeline, # but other merge methodologies may be supported in the future! merge_type='insert', # Specify that all the clean data will be modified. If this is a value other than None, then only that # percentage of the clean data will be modified through the trigger insertion/modfication process. per_class_trigger_frac=None ) ############# Create the data ############ # original MNIST - grayscale clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean') master_random_state_object.set_state(start_state) mnist.create_clean_dataset(train_csv_file, test_csv_file, clean_dataset_rootdir, train_output_csv_file, test_output_csv_file, 'mnist_train_', 'mnist_test_', [], master_random_state_object) # white alpha trigger w/ random rotation & location alpha_mod_dataset_rootdir = 'mnist_triggered_alpha' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file, toplevel_folder, alpha_mod_dataset_rootdir, one_channel_alpha_trigger_cfg, 'insert', master_random_state_object) # white random rectangular trigger w/ random rotation & location rr_mod_dataset_rootdir = 'mnist_triggered_rr' master_random_state_object.set_state(start_state) tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,