Esempio n. 1
0
def generate_mnist_experiment(train, test, output, train_output_csv_file, test_output_csv_file):
    logger.info("Generating experiment...")
    # Setup the files based on user inputs
    train_csv_file = os.path.abspath(train)
    test_csv_file = os.path.abspath(test)
    if not os.path.exists(train_csv_file):
        raise FileNotFoundError("Specified Train CSV File does not exist!")
    if not os.path.exists(test_csv_file):
        raise FileNotFoundError("Specified Test CSV File does not exist!")
    toplevel_folder = output

    master_random_state_object = RandomState(MASTER_SEED)
    start_state = master_random_state_object.get_state()

    # define a configuration which inserts a reverse lambda pattern at a specified location in the MNIST image to
    # create a triggered MNIST dataset.  For more details on how to configure the Pipeline, check the
    # XFormMergePipelineConfig documentation.  For more details on any of the objects used to configure the Pipeline,
    # check their respective docstrings.
    one_channel_alpha_trigger_cfg = \
        tdc.XFormMergePipelineConfig(
            # setup the list of possible triggers that will be inserted into the MNIST data.  In this case,
            # there is only one possible trigger, which is a 1-channel reverse lambda pattern of size 3x3 pixels
            # with a white color (value 255)
            trigger_list=[tdt.ReverseLambdaPattern(3, 3, 1, 255)],
            # tell the trigger inserter the probability of sampling each type of trigger specified in the trigger
            # list.  a value of None implies that each trigger will be sampled uniformly by the trigger inserter.
            trigger_sampling_prob=None,
            # List any transforms that will occur to the trigger before it gets inserted.  In this case, we do none.
            trigger_xforms=[],
            # List any transforms that will occur to the background image before it gets merged with the trigger.
            # Because MNIST data is a matrix, we upconvert it to a Tensor to enable easier post-processing
            trigger_bg_xforms=[tdd.ToTensorXForm()],
            # List how we merge the trigger and the background.  Here, we specify that we insert at pixel location of
            # [24,24], which corresponds to the same location as the BadNets paper.
            trigger_bg_merge=tdi.InsertAtLocation(np.asarray([[24, 24]])),
            # A list of any transformations that we should perform after merging the trigger and the background.
            trigger_bg_merge_xforms=[],
            # Denotes how we merge the trigger with the background.  In this case, we insert the trigger into the
            # image.  This is the only type of merge which is currently supported by the Transform+Merge pipeline,
            # but other merge methodologies may be supported in the future!
            merge_type='insert',
            # Specify that 15% of the clean data will be modified.  Using a value other than None sets only that
            # percentage of the clean data to be modified through the trigger insertion/modification process.
            per_class_trigger_frac=0.25
        )

    ############# Create the data ############
    # create the clean data
    clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean')
    master_random_state_object.set_state(start_state)
    mnist.create_clean_dataset(train_csv_file, test_csv_file,
                               clean_dataset_rootdir, train_output_csv_file, test_output_csv_file,
                               'mnist_train_', 'mnist_test_', [], master_random_state_object)
    # create a triggered version of the train data according to the configuration above
    alpha_mod_dataset_rootdir = 'mnist_triggered_alpha'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)
    # create a triggered version of the test data according to the configuration above
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)

    ############# Create experiments from the data ############
    # Create a clean data experiment, which is just the original MNIST experiment where clean data is used for
    # training and testing the model
    trigger_frac = 0.0
    trigger_behavior = tdb.WrappedAdd(1, 10)
    e = tde.ClassicExperiment(toplevel_folder, trigger_behavior)
    train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'),
                                   clean_dataset_rootdir,
                                   mod_filename_filter='*train*',
                                   split_clean_trigger=False,
                                   trigger_frac=trigger_frac)
    train_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_train.csv'), index=None)
    test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean',
                                                                        'test_mnist.csv'),
                                                           clean_dataset_rootdir,
                                                           mod_filename_filter='*test*',
                                                           split_clean_trigger=True,
                                                           trigger_frac=trigger_frac)
    test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_clean.csv'), index=None)
    test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_triggered.csv'), index=None)

    # Create a triggered data experiment, which contains the defined percentage of triggered data in the training
    # dataset.  The remaining training data is clean data.  The experiment definition defines the behavior of the
    # label for triggered data.  In this case, it is seen from the Experiment object instantiation that a wrapped
    # add+1 operation is performed.
    # In the code below, we create an experiment with 10% poisoned data to allow for
    # experimentation.
    trigger_frac = 0.2
    train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'),
                                   os.path.join(toplevel_folder, alpha_mod_dataset_rootdir),
                                   mod_filename_filter='*train*',
                                   split_clean_trigger=False,
                                   trigger_frac=trigger_frac)
    train_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) +
                                 '_experiment_train.csv'), index=None)
    test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder,
                                                                        'mnist_clean', 'test_mnist.csv'),
                                                           os.path.join(toplevel_folder, alpha_mod_dataset_rootdir),
                                                           mod_filename_filter='*test*',
                                                           split_clean_trigger=True,
                                                           trigger_frac=trigger_frac)
    test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) +
                                      '_experiment_test_clean.csv'), index=None)
    test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) +
                                          '_experiment_test_triggered.csv'), index=None)
Esempio n. 2
0
            trigger_bg_merge_xforms=[],
            # Denotes how we merge the trigger with the background.  In this case, we insert the trigger into the
            # image.  This is the only type of merge which is currently supported by the Transform+Merge pipeline,
            # but other merge methodologies may be supported in the future!
            merge_type='insert',
            # Specify that all the clean data will be modified.  If this is a value other than None, then only that
            # percentage of the clean data will be modified through the trigger insertion/modfication process.
            per_class_trigger_frac=None
        )

    ############# Create the data ############
    # original MNIST - grayscale
    clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean')
    master_random_state_object.set_state(start_state)
    mnist.create_clean_dataset(train_csv_file, test_csv_file,
                               clean_dataset_rootdir, train_output_csv_file, test_output_csv_file,
                               'mnist_train_', 'mnist_test_', [], master_random_state_object)
    # white alpha trigger w/ random rotation & location
    alpha_mod_dataset_rootdir = 'mnist_triggered_alpha'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)
    # white random rectangular trigger w/ random rotation & location
    rr_mod_dataset_rootdir = 'mnist_triggered_rr'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,