Beispiel #1
0
    def test_fetch_all(self):
        dataname = 'CIFAR10'
        data_lookup = DATASET_LIST[dataname]
        path = get_data_path()
        dc = DataContainer(data_lookup, path)
        dc(shuffle=False, normalize=False)

        x_train = dc.x_train
        y_train = dc.y_train
        x_test = dc.x_test
        y_test = dc.y_test
        x_all = dc.x_all
        y_all = dc.y_all

        data_shape = list(x_train.shape)
        data_shape[0] = len(x_train) + len(x_test)
        self.assertTupleEqual(x_all.shape, tuple(data_shape))

        label_shape = list(y_train.shape)
        label_shape[0] = len(y_train) + len(y_test)
        self.assertTupleEqual(y_all.shape, tuple(label_shape))

        dc.y_train = onehot_encoding(y_train, dc.num_classes)
        dc.y_test = onehot_encoding(y_test, dc.num_classes)
        x_train = dc.x_train
        y_train = dc.y_train
        y_all = dc.y_all
        label_shape = list(y_train.shape)
        label_shape[0] = len(y_train) + len(y_test)
        self.assertTupleEqual(y_all.shape, tuple(label_shape))
    def setUpClass(cls):
        master_seed(SEED)

        # generating synthetic data
        x, y = make_classification(
            n_samples=SAMPLE_SIZE,
            n_features=NUM_FEATURES,
            n_informative=NUM_CLASSES,
            n_redundant=0,
            n_classes=NUM_CLASSES,
            n_clusters_per_class=1,
        )
        x_max = np.max(x, axis=0)
        x_min = np.min(x, axis=0)
        x = scale_normalize(x, x_min, x_max)
        n_train = int(np.floor(SAMPLE_SIZE * 0.8))
        x_train = np.array(x[:n_train], dtype=np.float32)
        y_train = np.array(y[:n_train], dtype=np.long)
        x_test = np.array(x[n_train:], dtype=np.float32)
        y_test = np.array(y[n_train:], dtype=np.long)

        data_dict = get_synthetic_dataset_dict(SAMPLE_SIZE, NUM_CLASSES,
                                               NUM_FEATURES)
        dc = DataContainer(data_dict, get_data_path())
        dc.x_train = x_train
        dc.y_train = y_train
        dc.x_test = x_test
        dc.y_test = y_test

        # training Extra Tree classifier
        classifier = ExtraTreeClassifier(
            criterion='gini',
            splitter='random',
        )
        cls.mc = ModelContainerTree(classifier, dc)
        cls.mc.fit()
        accuracy = cls.mc.evaluate(dc.x_test, dc.y_test)
        logger.info('Accuracy on test set: %f', accuracy)
def main():
    parser = ap.ArgumentParser()
    parser.add_argument('-s',
                        '--size',
                        type=int,
                        required=True,
                        help='the number of sample size')
    parser.add_argument('-f',
                        '--features',
                        type=int,
                        required=True,
                        help='the number of features')
    parser.add_argument('-c',
                        '--classes',
                        type=int,
                        default=2,
                        help='the number of classes')
    parser.add_argument(
        '-i',
        '--iteration',
        type=int,
        default=MAX_ITERATIONS,
        help='the number of iterations that the experiment will repeat')
    parser.add_argument('-e',
                        '--epoch',
                        type=int,
                        required=True,
                        help='the number of max epochs for training')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='set logger level to debug')
    parser.add_argument('-l',
                        '--savelog',
                        action='store_true',
                        default=False,
                        help='save logging file')
    parser.add_argument('-w',
                        '--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite the existing file')

    args = parser.parse_args()
    sample_size = args.size
    num_features = args.features
    num_classes = args.classes
    max_iterations = args.iteration
    max_epochs = args.epoch
    verbose = args.verbose
    save_log = args.savelog
    overwrite = args.overwrite

    # set logging config. Run this before logging anything!
    dname = f'SyntheticS{sample_size}F{num_features}C{num_classes}'
    set_logging(LOG_NAME, dname, verbose, save_log)

    print('[{}] Start experiment on {}...'.format(LOG_NAME, dname))
    logger.info('Start at    :%s', get_time_str())
    logger.info('RECEIVED PARAMETERS:')
    logger.info('dataset     :%s', dname)
    logger.info('train size  :%d', sample_size)
    logger.info('num features:%d', num_features)
    logger.info('num classes :%d', num_classes)
    logger.info('iterations  :%d', max_iterations)
    logger.info('max_epochs  :%d', max_epochs)
    logger.info('verbose     :%r', verbose)
    logger.info('save_log    :%r', save_log)
    logger.info('overwrite   :%r', overwrite)

    result_file = name_handler(os.path.join(
        'save', f'{LOG_NAME}_{dname}_i{max_iterations}'),
                               'csv',
                               overwrite=overwrite)

    adv_file = name_handler(os.path.join('save',
                                         f'{LOG_NAME}_{dname}_AdvExamples'),
                            'csv',
                            overwrite=overwrite)

    adv_file = open(adv_file, 'w')
    adv_file.write(','.join(TITLE_ADV) + '\n')
    res_file = open(result_file, 'w')
    res_file.write(','.join(TITLE_RESULTS) + '\n')
    for i in range(max_iterations):
        since = time.time()
        # generate synthetic data
        x, y = make_classification(
            n_samples=sample_size + 1000,
            n_features=num_features,
            n_informative=num_classes,
            n_redundant=0,
            n_classes=num_classes,
            n_clusters_per_class=1,
        )

        # normalize data
        x_max = np.max(x, axis=0)
        x_min = np.min(x, axis=0)
        # NOTE: Carlini attack expects the data in range [0, 1]
        # x_mean = np.mean(x, axis=0)
        # x = scale_normalize(x, x_min, x_max, x_mean)
        x = scale_normalize(x, x_min, x_max)

        # training/test split
        # NOTE: test set has fixed size
        x_train = np.array(x[:-1000], dtype=np.float32)
        y_train = np.array(y[:-1000], dtype=np.long)
        x_test = np.array(x[-1000:], dtype=np.float32)
        y_test = np.array(y[-1000:], dtype=np.long)

        # create data container
        data_dict = get_synthetic_dataset_dict(sample_size + 1000, num_classes,
                                               num_features)
        dc = DataContainer(data_dict, get_data_path())

        # assign data manually
        dc.x_train = x_train
        dc.y_train = y_train
        dc.x_test = x_test
        dc.y_test = y_test

        experiment(i, dc, max_epochs, adv_file, res_file)
        time_elapsed = time.time() - since
        print('Completed {} [{}/{}]: {:d}m {:2.1f}s'.format(
            dname, i + 1, max_iterations, int(time_elapsed // 60),
            time_elapsed % 60))

    adv_file.close()
    res_file.close()