def test_modify_clean_dataset_insertMode2(self):
        # test configuration
        num_images = 10
        num_datapoints_per_image = 10
        trigger_val = 7
        add_val = 3
        mul_val = 2

        # create "clean" dataset
        dd_list = []
        for ii in range(num_images):
            data = np.arange(ii, ii + num_datapoints_per_image)
            data_fname = 'file_' + str(ii) + '.png'
            cv2.imwrite(os.path.join(self.clean_dataset_rootdir, data_fname),
                        data)
            dd_list.append({'file': data_fname})
        clean_df = pd.DataFrame(dd_list)
        clean_csv_fname = 'data.csv'
        clean_df.to_csv(os.path.join(self.clean_dataset_rootdir,
                                     clean_csv_fname),
                        index=None)

        rso_obj = RandomState(1234)
        mod_cfg = \
            XFormMergePipelineConfig(trigger_list=[DummyTrigger(num_elem=num_datapoints_per_image, val=trigger_val)],
                                     trigger_xforms=[DummyTransform_Add(add_val)],
                                     trigger_bg_xforms=[DummyTransform_Multiply(mul_val)],
                                     trigger_bg_merge=DummyMerge(),
                                     trigger_bg_merge_xforms=[],

                                     merge_type='insert',
                                     per_class_trigger_frac=None)

        # run the modification function
        mod_output_rootdir = os.path.join(self.clean_dataset_rootdir,
                                          'modified')
        mod_output_subdir = os.path.join(mod_output_rootdir, 'subdir')
        XFormMergePipeline.modify_clean_image_dataset(
            self.clean_dataset_rootdir,
            clean_csv_fname,
            mod_output_rootdir,
            mod_output_subdir,
            mod_cfg,
            method='insert')

        # compare results w/ expected
        for ii in range(num_images):
            fname = 'file_' + str(ii) + '.png'
            triggered_data_fp = os.path.join(mod_output_rootdir,
                                             mod_output_subdir, fname)

            triggered_data = np.reshape(
                GenericImageEntity(cv2.imread(triggered_data_fp,
                                              -1)).get_data(),
                (num_datapoints_per_image, ))
            expected_data = np.arange(ii, ii + num_datapoints_per_image
                                      ) * mul_val + trigger_val + add_val
            self.assertTrue(np.allclose(triggered_data, expected_data))
def generate_mnist_experiment(train, test, output, train_output_csv_file, test_output_csv_file):
    logger.info("Generating experiment...")
    # Setup the files based on user inputs
    train_csv_file = os.path.abspath(train)
    test_csv_file = os.path.abspath(test)
    if not os.path.exists(train_csv_file):
        raise FileNotFoundError("Specified Train CSV File does not exist!")
    if not os.path.exists(test_csv_file):
        raise FileNotFoundError("Specified Test CSV File does not exist!")
    toplevel_folder = output

    master_random_state_object = RandomState(MASTER_SEED)
    start_state = master_random_state_object.get_state()

    # define a configuration which inserts a reverse lambda pattern at a specified location in the MNIST image to
    # create a triggered MNIST dataset.  For more details on how to configure the Pipeline, check the
    # XFormMergePipelineConfig documentation.  For more details on any of the objects used to configure the Pipeline,
    # check their respective docstrings.
    one_channel_alpha_trigger_cfg = \
        tdc.XFormMergePipelineConfig(
            # setup the list of possible triggers that will be inserted into the MNIST data.  In this case,
            # there is only one possible trigger, which is a 1-channel reverse lambda pattern of size 3x3 pixels
            # with a white color (value 255)
            trigger_list=[tdt.ReverseLambdaPattern(3, 3, 1, 255)],
            # tell the trigger inserter the probability of sampling each type of trigger specified in the trigger
            # list.  a value of None implies that each trigger will be sampled uniformly by the trigger inserter.
            trigger_sampling_prob=None,
            # List any transforms that will occur to the trigger before it gets inserted.  In this case, we do none.
            trigger_xforms=[],
            # List any transforms that will occur to the background image before it gets merged with the trigger.
            # Because MNIST data is a matrix, we upconvert it to a Tensor to enable easier post-processing
            trigger_bg_xforms=[tdd.ToTensorXForm()],
            # List how we merge the trigger and the background.  Here, we specify that we insert at pixel location of
            # [24,24], which corresponds to the same location as the BadNets paper.
            trigger_bg_merge=tdi.InsertAtLocation(np.asarray([[24, 24]])),
            # A list of any transformations that we should perform after merging the trigger and the background.
            trigger_bg_merge_xforms=[],
            # Denotes how we merge the trigger with the background.  In this case, we insert the trigger into the
            # image.  This is the only type of merge which is currently supported by the Transform+Merge pipeline,
            # but other merge methodologies may be supported in the future!
            merge_type='insert',
            # Specify that 15% of the clean data will be modified.  Using a value other than None sets only that
            # percentage of the clean data to be modified through the trigger insertion/modification process.
            per_class_trigger_frac=0.25
        )

    ############# Create the data ############
    # create the clean data
    clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean')
    master_random_state_object.set_state(start_state)
    mnist.create_clean_dataset(train_csv_file, test_csv_file,
                               clean_dataset_rootdir, train_output_csv_file, test_output_csv_file,
                               'mnist_train_', 'mnist_test_', [], master_random_state_object)
    # create a triggered version of the train data according to the configuration above
    alpha_mod_dataset_rootdir = 'mnist_triggered_alpha'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)
    # create a triggered version of the test data according to the configuration above
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)

    ############# Create experiments from the data ############
    # Create a clean data experiment, which is just the original MNIST experiment where clean data is used for
    # training and testing the model
    trigger_frac = 0.0
    trigger_behavior = tdb.WrappedAdd(1, 10)
    e = tde.ClassicExperiment(toplevel_folder, trigger_behavior)
    train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'),
                                   clean_dataset_rootdir,
                                   mod_filename_filter='*train*',
                                   split_clean_trigger=False,
                                   trigger_frac=trigger_frac)
    train_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_train.csv'), index=None)
    test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean',
                                                                        'test_mnist.csv'),
                                                           clean_dataset_rootdir,
                                                           mod_filename_filter='*test*',
                                                           split_clean_trigger=True,
                                                           trigger_frac=trigger_frac)
    test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_clean.csv'), index=None)
    test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_clean_experiment_test_triggered.csv'), index=None)

    # Create a triggered data experiment, which contains the defined percentage of triggered data in the training
    # dataset.  The remaining training data is clean data.  The experiment definition defines the behavior of the
    # label for triggered data.  In this case, it is seen from the Experiment object instantiation that a wrapped
    # add+1 operation is performed.
    # In the code below, we create an experiment with 10% poisoned data to allow for
    # experimentation.
    trigger_frac = 0.2
    train_df = e.create_experiment(os.path.join(toplevel_folder, 'mnist_clean', 'train_mnist.csv'),
                                   os.path.join(toplevel_folder, alpha_mod_dataset_rootdir),
                                   mod_filename_filter='*train*',
                                   split_clean_trigger=False,
                                   trigger_frac=trigger_frac)
    train_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) +
                                 '_experiment_train.csv'), index=None)
    test_clean_df, test_triggered_df = e.create_experiment(os.path.join(toplevel_folder,
                                                                        'mnist_clean', 'test_mnist.csv'),
                                                           os.path.join(toplevel_folder, alpha_mod_dataset_rootdir),
                                                           mod_filename_filter='*test*',
                                                           split_clean_trigger=True,
                                                           trigger_frac=trigger_frac)
    test_clean_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) +
                                      '_experiment_test_clean.csv'), index=None)
    test_triggered_df.to_csv(os.path.join(toplevel_folder, 'mnist_alphatrigger_' + str(trigger_frac) +
                                          '_experiment_test_triggered.csv'), index=None)
            triggered_classes=[4]
        )

    ############# Create the data ############
    # create the clean data
    clean_dataset_rootdir = os.path.join(toplevel_folder, 'cifar10_clean')
    master_random_state_object.set_state(start_state)
    cifar10.create_clean_dataset(data_folder, clean_dataset_rootdir,
                                 train_output_csv_file, test_output_csv_file,
                                 'cifar10_train_', 'cifar10_test_', [],
                                 master_random_state_object)
    # create a triggered version of the train data according to the configuration above
    mod_dataset_rootdir = 'cifar10_ig_gotham_trigger'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir,
                                   train_output_csv_file, toplevel_folder,
                                   mod_dataset_rootdir, gotham_trigger_cfg,
                                   'insert', master_random_state_object)
    # create a triggered version of the test data according to the configuration above
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file,
                                   toplevel_folder, mod_dataset_rootdir,
                                   gotham_trigger_cfg, 'insert',
                                   master_random_state_object)

    ############# Create experiments from the data ############
    # Create a clean data experiment, which is just the original CIFAR10 experiment where clean data is used for
    # training and testing the model
    trigger_frac = 0.0
    trigger_behavior = tdb.WrappedAdd(1, 10)
    e = tde.ClassicExperiment(toplevel_folder, trigger_behavior)
    train_df = e.create_experiment(os.path.join(toplevel_folder,
Exemple #4
0
            # percentage of the clean data will be modified through the trigger insertion/modfication process.
            per_class_trigger_frac=None
        )

    ############# Create the data ############
    # original MNIST - grayscale
    clean_dataset_rootdir = os.path.join(toplevel_folder, 'mnist_clean')
    master_random_state_object.set_state(start_state)
    mnist.create_clean_dataset(train_csv_file, test_csv_file,
                               clean_dataset_rootdir, train_output_csv_file, test_output_csv_file,
                               'mnist_train_', 'mnist_test_', [], master_random_state_object)
    # white alpha trigger w/ random rotation & location
    alpha_mod_dataset_rootdir = 'mnist_triggered_alpha'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file,
                                   toplevel_folder, alpha_mod_dataset_rootdir,
                                   one_channel_alpha_trigger_cfg, 'insert', master_random_state_object)
    # white random rectangular trigger w/ random rotation & location
    rr_mod_dataset_rootdir = 'mnist_triggered_rr'
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, train_output_csv_file,
                                   toplevel_folder, rr_mod_dataset_rootdir,
                                   one_channel_binary_trigger_cfg, 'insert', master_random_state_object)
    master_random_state_object.set_state(start_state)
    tdx.modify_clean_image_dataset(clean_dataset_rootdir, test_output_csv_file,
                                   toplevel_folder, rr_mod_dataset_rootdir,
                                   one_channel_binary_trigger_cfg, 'insert', master_random_state_object)
Exemple #5
0
    def test_modify_clean_dataset_regenerateMode(self):
        # test configuration
        num_images = 10
        num_datapoints_per_image = 10
        merge_add_val = 20

        # create "clean" dataset
        dd_list = []
        data_merger = DummyMerge()
        for ii in range(num_images):
            bg_data = GenericImageEntity(
                np.linspace(ii, ii + 1, num_datapoints_per_image))
            bg_data_fname = os.path.join(self.clean_dataset_rootdir,
                                         'bg_file_' + str(ii) + '.png')
            cv2.imwrite(bg_data_fname, bg_data.get_data())

            fg_data = GenericImageEntity(
                np.linspace(ii + 2, ii + 3, num_datapoints_per_image))
            fg_data_fname = os.path.join(self.clean_dataset_rootdir,
                                         'fg_file_' + str(ii) + '.png')
            cv2.imwrite(fg_data_fname, fg_data.get_data())

            # create the combined "file"
            data = data_merger.do(bg_data, fg_data, RandomState())
            data_fname = 'file_' + str(ii) + '.png'
            cv2.imwrite(os.path.join(self.clean_dataset_rootdir, data_fname),
                        data.get_data())

            dd_list.append({
                'bg_file': os.path.abspath(bg_data_fname),
                'fg_file': os.path.abspath(fg_data_fname),
                'file': data_fname
            })
        clean_df = pd.DataFrame(dd_list)
        clean_csv_fname = 'data.csv'
        clean_df.to_csv(os.path.join(self.clean_dataset_rootdir,
                                     clean_csv_fname),
                        index=None)

        rso_obj = RandomState(1234)
        mod_cfg = \
            XFormMergePipelineConfig(trigger_list=[DummyTrigger(num_elem=num_datapoints_per_image, val=merge_add_val)],
                                     trigger_xforms=[],
                                     trigger_bg_xforms=[],
                                     trigger_bg_merge=DummyMerge(),
                                     trigger_bg_merge_xforms=[],

                                     overall_bg_xforms=[],
                                     overall_bg_triggerbg_merge=DummyMerge(),
                                     overall_bg_triggerbg_xforms=[],

                                     merge_type='regenerate',
                                     per_class_trigger_frac=None)

        # run the modification function
        mod_output_rootdir = os.path.join(self.clean_dataset_rootdir,
                                          'modified')
        mod_output_subdir = os.path.join(mod_output_rootdir, 'subdir')
        XFormMergePipeline.modify_clean_image_dataset(
            self.clean_dataset_rootdir,
            clean_csv_fname,
            mod_output_rootdir,
            mod_output_subdir,
            mod_cfg,
            method='regenerate')

        # compare results w/ expected
        for ii in range(num_images):
            bg_data_fname = 'bg_file_' + str(ii) + '.png'
            bg_data_fp = os.path.join(self.clean_dataset_rootdir,
                                      bg_data_fname)
            bg_data = cv2.imread(bg_data_fp, -1)

            fg_data_fname = 'fg_file_' + str(ii) + '.png'
            fg_data_fp = os.path.join(self.clean_dataset_rootdir,
                                      fg_data_fname)
            fg_data = cv2.imread(fg_data_fp, -1)

            triggered_data_fp = os.path.join(mod_output_rootdir,
                                             mod_output_subdir,
                                             'file_' + str(ii) + '.png')

            triggered_data = cv2.imread(triggered_data_fp, -1)
            expected_data = bg_data + merge_add_val + fg_data

            self.assertTrue(np.allclose(triggered_data, expected_data))