Ejemplo n.º 1
0
def demo_basic(rank,
               world_size,
               weight,
               dp,
               noise_multiplier=0,
               max_grad_norm=1e8):
    # We don't want the 2 GPUs to work on the same examples/labels in parallel
    torch.manual_seed(rank)
    batch_size = 32
    withdp = "with" + ("out " if not dp else "")
    print(
        f"Running basic DDP {withdp} differential privacy example on rank {rank}."
    )

    device = setup_and_get_device(rank, world_size)

    # create model and move it to GPU with id rank
    model = ToyModel().to(device)
    print(f"Initial weight: {model.net1.weight.data}")

    # Freeze all the parameters except one, to ensure that the noise is the same
    # (the DDP hook does not browse the layers in the same order as the naive implementation)
    model.net1.bias.requires_grad = False
    model.net2.bias.requires_grad = False
    model.net2.weight.requires_grad = False

    if dp:
        ddp_model = DPDDP(model)
        engine = PrivacyEngine(
            ddp_model,
            batch_size=batch_size,
            sample_size=10 * batch_size,
            alphas=PRIVACY_ALPHAS,
            noise_multiplier=noise_multiplier,
            max_grad_norm=[max_grad_norm],
        )
        engine.random_number_generator = engine._set_seed(0)
    else:
        ddp_model = DDP(model, device_ids=[device])

    loss_fn = nn.MSELoss()
    optimizer = optim.SGD(ddp_model.parameters(), lr=1)
    if dp:
        engine.attach(optimizer)

    optimizer.zero_grad()
    labels = torch.randn(batch_size, 5).to(device)

    outputs = ddp_model(torch.randn(batch_size, 10).to(device))
    loss_fn(outputs, labels).backward()
    optimizer.step()

    weight.copy_(model.net1.weight.data.cpu())

    cleanup()
def demo_basic(rank, weight, world_size, dp):
    torch.manual_seed(world_size)
    batch_size = 32
    withdp = "with" + ("out " if not dp else "")
    print(
        f"Running basic DDP {withdp} differential privacy example on rank {rank}."
    )
    setup(rank, world_size)

    # create model and move it to GPU with id rank
    model = ToyModel().to(rank)
    if dp:
        ddp_model = DPDDP(model)
        engine = PrivacyEngine(
            ddp_model,
            batch_size=batch_size,
            sample_size=10 * batch_size,
            alphas=PRIVACY_ALPHAS,
            noise_multiplier=0,
            max_grad_norm=1e8,
        )
    else:
        ddp_model = DDP(model, device_ids=[rank])

    loss_fn = nn.MSELoss()
    optimizer = optim.SGD(ddp_model.parameters(), lr=1)
    if dp:
        engine.attach(optimizer)

    # if rank == 0:
    #     print(model.net1.weight)
    optimizer.zero_grad()
    labels = torch.randn(batch_size, 5).to(rank)
    outputs = ddp_model(torch.randn(batch_size, 10).to(rank))
    loss_fn(outputs, labels).backward()
    optimizer.step()
    # if rank == 0:
    #     print(model.net1.weight)

    weight.copy_(model.net1.weight.data.cpu())

    cleanup()
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training")
    parser.add_argument(
        "-j",
        "--workers",
        default=0,
        type=int,
        metavar="N",
        help="number of data loading workers (default: 2)",
    )
    parser.add_argument(
        "--epochs",
        default=90,
        type=int,
        metavar="N",
        help="number of total epochs to run",
    )
    parser.add_argument(
        "--start-epoch",
        default=1,
        type=int,
        metavar="N",
        help="manual epoch number (useful on restarts)",
    )
    parser.add_argument(
        "-b",
        "--batch-size-test",
        default=256,
        type=int,
        metavar="N",
        help="mini-batch size for test dataset (default: 256), this is the total "
        "batch size of all GPUs on the current node when "
        "using Data Parallel or Distributed Data Parallel",
    )
    parser.add_argument(
        "--sample-rate",
        default=0.04,
        type=float,
        metavar="SR",
        help="sample rate used for batch construction (default: 0.005)",
    )
    parser.add_argument(
        "-na",
        "--n_accumulation_steps",
        default=1,
        type=int,
        metavar="N",
        help="number of mini-batches to accumulate into an effective batch",
    )
    parser.add_argument(
        "--lr",
        "--learning-rate",
        default=0.1,
        type=float,
        metavar="LR",
        help="initial learning rate",
        dest="lr",
    )
    parser.add_argument(
        "--momentum", default=0.9, type=float, metavar="M", help="SGD momentum"
    )
    parser.add_argument(
        "--wd",
        "--weight-decay",
        default=5e-4,
        type=float,
        metavar="W",
        help="SGD weight decay",
        dest="weight_decay",
    )
    parser.add_argument(
        "-p",
        "--print-freq",
        default=10,
        type=int,
        metavar="N",
        help="print frequency (default: 10)",
    )
    parser.add_argument(
        "--resume",
        default="",
        type=str,
        metavar="PATH",
        help="path to latest checkpoint (default: none)",
    )
    parser.add_argument(
        "-e",
        "--evaluate",
        dest="evaluate",
        action="store_true",
        help="evaluate model on validation set",
    )
    parser.add_argument(
        "--seed", default=None, type=int, help="seed for initializing training. "
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.5,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=10.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--secure-rng",
        action="store_true",
        default=False,
        help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )

    parser.add_argument(
        "--checkpoint-file",
        type=str,
        default="checkpoint",
        help="path to save check points",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../cifar10",
        help="Where CIFAR10 is/will be stored",
    )
    parser.add_argument(
        "--log-dir", type=str, default="", help="Where Tensorboard log will be stored"
    )
    parser.add_argument(
        "--optim",
        type=str,
        default="SGD",
        help="Optimizer to use (Adam, RMSprop, SGD)",
    )
    parser.add_argument(
        "--lr-schedule", type=str, choices=["constant", "cos"], default="cos"
    )
    parser.add_argument(
        "--local_rank",
        type=int,
        default=-1,
        help="Local rank if multi-GPU training, -1 for single GPU training",
    )
    # New added args
    parser.add_argument(
        "--model-name",
        type=str,
        default="ConvNet",
        help="Name of the model structure",
    )
    parser.add_argument(
        "--results-folder",
        type=str,
        default="../results/cifar10",
        help="Where CIFAR10 results is/will be stored",
    )
    parser.add_argument(
        "--sub-training-size",
        type=int,
        default=0,
        help="Size of bagging",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        metavar="R",
        help="number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--run-test",
        action="store_true",
        default=False,
        help="Run test for the model (default: false)",
    )
    parser.add_argument(
        "--load-model",
        action="store_true",
        default=False,
        help="Load model not train (default: false)",
    )
    parser.add_argument(
        "--train-mode",
        type=str,
        default="DP",
        help="Train mode: DP, Sub-DP, Bagging",
    )
    parser.add_argument(
        "--sub-acc-test",
        action="store_true",
        default=False,
        help="Test subset V.S. acc (default: false)",
    )

    args = parser.parse_args()

    # folder path
    result_folder = result_folder_path_generator(args)
    print(f'Result folder: {result_folder}')
    models_folder = f"{result_folder}/models"
    Path(models_folder).mkdir(parents=True, exist_ok=True)

    # logging
    logging.basicConfig(filename=f"{result_folder}/train.log", filemode='w', level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler())

    # distributed = False
    # if args.local_rank != -1:
    #     setup()
    #     distributed = True

    # Sets `world_size = 1` if you run on a single GPU with `args.local_rank = -1`
    if args.device != "cpu":
        rank, local_rank, world_size = setup(args)
        device = local_rank
    else:
        device = "cpu"
        rank = 0
        world_size = 1


    if args.train_mode == 'Bagging' and args.n_accumulation_steps > 1:
        raise ValueError("Virtual steps only works with enabled DP")

    # The following lines enable stat gathering for the clipping process
    # and set a default of per layer clipping for the Privacy Engine
    clipping = {"clip_per_layer": False, "enable_stat": True}

    generator = None

    augmentations = [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ]
    normalize = [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]

    train_transform = transforms.Compose(
        augmentations + normalize if args.train_mode == 'Bagging' else normalize
    )
    test_transform = transforms.Compose(normalize)

    def gen_sub_dataset(dataset, sub_training_size, with_replacement):
        indexs = np.random.choice(len(dataset), sub_training_size, replace=with_replacement)
        dataset = torch.utils.data.Subset(dataset, indexs)
        print(f"Sub-dataset size {len(dataset)}")
        return dataset

    def gen_train_dataset_loader(sub_training_size):
        train_dataset = CIFAR10(
            root=args.data_root, train=True, download=True, transform=train_transform
        )

        if args.train_mode == 'Sub-DP' or args.train_mode == 'Bagging':
            train_dataset = gen_sub_dataset(train_dataset, sub_training_size, True)
        
        batch_num = None

        if world_size > 1:
            dist_sampler = DistributedSampler(train_dataset)
        else:
            dist_sampler = None

        # batch_size = int(args.sample_rate * len(train_dataset))

        # if args.train_mode == 'DP' or args.train_mode == 'Sub-DP':
        #     train_loader = torch.utils.data.DataLoader(
        #         train_dataset,
        #         num_workers=args.workers,
        #         generator=generator,
        #         batch_size=batch_size,
        #         sampler=dist_sampler,
        #     )
        # elif args.train_mode == 'Sub-DP-no-amp':
        #     train_loader = torch.utils.data.DataLoader(
        #         train_dataset,
        #         num_workers=args.workers,
        #         generator=generator,
        #         batch_size=batch_size,
        #         sampler=dist_sampler,
        #     )
        #     batch_num = int(sub_training_size / int(args.sample_rate * len(train_dataset)))
        # else:
        #     print('No Gaussian Sampler')
        #     train_loader = torch.utils.data.DataLoader(
        #         train_dataset,
        #         num_workers=args.workers,
        #         generator=generator,
        #         batch_size=int(128/world_size),
        #         sampler=dist_sampler,
        #     )
        # return train_dataset, train_loader, batch_num
        
        if args.train_mode == 'DP' or args.train_mode == 'Sub-DP':
            train_loader = torch.utils.data.DataLoader(
                train_dataset,
                num_workers=args.workers,
                generator=generator,
                batch_sampler=FixedSizedUniformWithReplacementSampler(
                    num_samples=len(train_dataset),
                    sample_rate=args.sample_rate/world_size,
                    train_size=len(train_dataset)/world_size,
                    generator=generator,
                ),
            )
        elif args.train_mode == 'Sub-DP-no-amp':
            train_loader = torch.utils.data.DataLoader(
                train_dataset,
                num_workers=args.workers,
                generator=generator,
                batch_sampler=FixedSizedUniformWithReplacementSampler(
                    num_samples=len(train_dataset),
                    sample_rate=args.sample_rate/world_size,
                    train_size=sub_training_size/world_size,
                    generator=generator,
                ),
            )
        else:
            print('No Gaussian Sampler')
            train_loader = torch.utils.data.DataLoader(
                train_dataset,
                num_workers=args.workers,
                generator=generator,
                batch_size=int(256/world_size),
                sampler=dist_sampler,
            )
        return train_dataset, train_loader, batch_num

    def gen_test_dataset_loader():
        test_dataset = CIFAR10(
            root=args.data_root, train=False, download=True, transform=test_transform
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=args.batch_size_test,
            shuffle=False,
            num_workers=args.workers,
        )
        return test_dataset, test_loader
    

    # if distributed and args.device == "cuda":
    #     args.device = "cuda:" + str(args.local_rank)
    # device = torch.device(args.device)

    """ Here we go the training and testing process """
    
    # collect votes from all models
    test_dataset, test_loader = gen_test_dataset_loader()
    aggregate_result = np.zeros([len(test_dataset), 10 + 1], dtype=np.int)
    aggregate_result_softmax = np.zeros([args.n_runs, len(test_dataset), 10 + 1], dtype=np.float32)
    acc_list = []

    # use this code for "sub_training_size V.S. acc"
    if args.sub_acc_test:
        sub_acc_list = []
    
    for run_idx in range(args.n_runs):
        # Pre-training stuff for each base classifier
        
        # Define the model
        if args.model_name == 'ConvNet':
            model = convnet(num_classes=10)
        elif args.model_name == 'ResNet18-BN':
            model = ResNet18(10)
            # model = module_modification.convert_batchnorm_modules(models.resnet18(pretrained=False, num_classes=10))
            # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            # model = models.resnet18(pretrained=False, num_classes=10)
            # model = module_modification.convert_batchnorm_modules(ResNet18(10))
        elif args.model_name == 'ResNet18-GN':
            model = module_modification.convert_batchnorm_modules(ResNet18(10))
        elif args.model_name == 'LeNet':
            model = LeNet()
        else:
            exit(f'Model name {args.model_name} invaild.')
        model = model.to(device)
        
        if world_size > 1:
            if not args.train_mode == 'Bagging':
                model = DPDDP(model)
            else:
                # model = DDP(model, device_ids=[args.local_rank])
                model = DDP(model, device_ids=[device])
        
        # Define the optimizer
        if args.optim == "SGD":
            optimizer = optim.SGD(
                model.parameters(),
                lr=args.lr,
                momentum=args.momentum,
                weight_decay=args.weight_decay,
            )
        elif args.optim == "RMSprop":
            optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
        elif args.optim == "Adam":
            optimizer = optim.Adam(model.parameters(), lr=args.lr)
        else:
            raise NotImplementedError("Optimizer not recognized. Please check spelling")

        # Define the DP engine
        if args.train_mode != 'Bagging':
            privacy_engine = PrivacyEngine(
                model,
                sample_rate=args.sample_rate * args.n_accumulation_steps / world_size,
                alphas=[1 + x / 10.0 for x in range(1, 100)],
                noise_multiplier= 0.0 if args.train_mode == 'Bagging' else args.sigma,
                max_grad_norm=args.max_per_sample_grad_norm,
                secure_rng=args.secure_rng,
                **clipping,
            )
            privacy_engine.attach(optimizer)
        
        # Training and testing
        model_pt_file = f"{models_folder}/model_{run_idx}.pt"
        def training_process():
            logging.info(f'training model_{run_idx}...')
            # use this code for "sub_training_size V.S. acc"
            if args.sub_acc_test:
                sub_training_size = int(50000 - 50000 / args.n_runs * run_idx)
                _, train_loader, batch_num = gen_train_dataset_loader(sub_training_size)
            else:    
                sub_training_size = args.sub_training_size
                _, train_loader, batch_num = gen_train_dataset_loader(sub_training_size)

            epoch_acc_epsilon = []
            for epoch in range(args.start_epoch, args.epochs + 1):
                if args.lr_schedule == "cos":
                    lr = args.lr * 0.5 * (1 + np.cos(np.pi * epoch / (args.epochs + 1)))
                    for param_group in optimizer.param_groups:
                        param_group["lr"] = lr

                train(args, model, train_loader, optimizer, epoch, device, batch_num)

                # if args.run_test:
                #     logging.info(f'Epoch: {epoch}')
                #     test(args, model, test_loader, device)

                # if run_idx == 0:
                #     logging.info(f'Epoch: {epoch}')
                #     acc = test(args, model, test_loader, device)
                #     if args.train_mode in ['DP', 'Sub-DP', 'Sub-DP-no-amp']:
                #         eps, _ = optimizer.privacy_engine.get_privacy_spent(args.delta)
                #         epoch_acc_epsilon.append((acc, eps))
            
            
            if run_idx == 0:
                np.save(f"{result_folder}/epoch_acc_eps", epoch_acc_epsilon)
            
            # Post-training stuff 

            # use this code for "sub_training_size V.S. acc"
            if args.sub_acc_test:
                sub_acc_list.append((sub_training_size, test(args, model, test_loader, device)))

            # save the DP related data
            if run_idx == 0 and args.train_mode in ['DP', 'Sub-DP', 'Sub-DP-no-amp']:
                rdp_alphas, rdp_epsilons = optimizer.privacy_engine.get_rdp_privacy_spent()
                dp_epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(args.delta)
                rdp_steps = optimizer.privacy_engine.steps
                logging.info(f"epsilon {dp_epsilon}, best_alpha {best_alpha}, steps {rdp_steps}")

                logging.info(f"sample_rate {optimizer.privacy_engine.sample_rate}, noise_multiplier {optimizer.privacy_engine.noise_multiplier}, steps {optimizer.privacy_engine.steps}")
                
                np.save(f"{result_folder}/rdp_epsilons", rdp_epsilons)
                np.save(f"{result_folder}/rdp_alphas", rdp_alphas)
                np.save(f"{result_folder}/rdp_steps", rdp_steps)
                np.save(f"{result_folder}/dp_epsilon", dp_epsilon)
        
        if os.path.isfile(model_pt_file) or args.load_model:
            try:
                torch.distributed.barrier()
                logging.info(f'loading existing model_{run_idx}...')
                map_location = {'cuda:%d' % 0: 'cuda:%d' % local_rank}
                model.load_state_dict(torch.load(model_pt_file, map_location=map_location))
            except Exception as inst:
                logging.info(f'fail to load model with error: {inst}')
                training_process()
        else:
            training_process()
        
        # save preds and model
        aggregate_result[np.arange(0, len(test_dataset)), pred(args, model, test_loader, device)] += 1
        aggregate_result_softmax[run_idx, np.arange(0, len(test_dataset)), 0:10] = softmax(args, model, test_loader, device)
        acc_list.append(test(args, model, test_loader, device))
        if not args.load_model and args.save_model and local_rank == 0:
            torch.save(model.state_dict(), model_pt_file)

    # Finish trining all models, save results
    aggregate_result[np.arange(0, len(test_dataset)), -1] = next(iter(torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset))))[1]
    aggregate_result_softmax[:, np.arange(0, len(test_dataset)), -1] = next(iter(torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset))))[1]
    np.save(f"{result_folder}/aggregate_result", aggregate_result)
    np.save(f"{result_folder}/aggregate_result_softmax", aggregate_result_softmax)
    np.save(f"{result_folder}/acc_list", acc_list)

    # use this code for "sub_training_size V.S. acc"
    if args.sub_acc_test:
        np.save(f"{result_folder}/subset_acc_list", sub_acc_list)

    if world_size > 1:
        cleanup()
Ejemplo n.º 4
0
def main():

    args = parse_args()

    if args.debug >= 1:
        logger.setLevel(level=logging.DEBUG)

    # Sets `world_size = 1` if you run on a single GPU with `args.local_rank = -1`
    if args.device != "cpu":
        rank, local_rank, world_size = setup(args)
        device = local_rank
    else:
        device = "cpu"
        rank = 0
        world_size = 1

    if args.disable_dp and args.n_accumulation_steps > 1:
        raise ValueError("Virtual steps only works with enabled DP")

    if args.dist_algo == "ddp_hook" and not args.clip_per_layer:
        raise ValueError(
            "Please enable `--clip_per_layer` if you want to use Opacus DDP")

    # The following few lines, enable stats gathering about the run
    # 1. where the stats should be logged
    stats.set_global_summary_writer(tensorboard.SummaryWriter(args.log_dir))
    # 2. enable stats
    stats.add(
        # stats about gradient norms aggregated for all layers
        stats.Stat(stats.StatType.GRAD, "AllLayers", frequency=0.1),
        # stats about gradient norms per layer
        stats.Stat(stats.StatType.GRAD, "PerLayer", frequency=0.1),
        # stats about clipping
        stats.Stat(stats.StatType.GRAD, "ClippingStats", frequency=0.1),
        # stats on training accuracy
        stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01),
        # stats on validation accuracy
        stats.Stat(stats.StatType.TEST, "accuracy"),
    )

    # The following lines enable stat gathering for the clipping process
    # and set a default of per layer clipping for the Privacy Engine
    clipping = {
        "clip_per_layer": args.clip_per_layer,
        "enable_stat": (rank == 0),
    }

    if args.secure_rng:
        try:
            import torchcsprng as prng
        except ImportError as e:
            msg = (
                "To use secure RNG, you must install the torchcsprng package! "
                "Check out the instructions here: https://github.com/pytorch/csprng#installation"
            )
            raise ImportError(msg) from e

        generator = prng.create_random_device_generator("/dev/urandom")

    else:
        generator = None

    augmentations = [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ]
    normalize = [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ]
    train_transform = transforms.Compose(
        augmentations + normalize if args.disable_dp else normalize)

    test_transform = transforms.Compose(normalize)

    train_dataset = CIFAR10(root=args.data_root,
                            train=True,
                            download=True,
                            transform=train_transform)

    if world_size > 1:
        train_sampler = DistributedPoissonBatchSampler(
            total_size=len(train_dataset),
            sample_rate=args.sample_rate,
            num_replicas=world_size,
            rank=rank,
            generator=generator,
        )

    else:
        train_sampler = UniformWithReplacementSampler(
            num_samples=len(train_dataset),
            sample_rate=args.sample_rate,
            generator=generator,
        )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        generator=generator,
        num_workers=args.workers,
        pin_memory=True,
    )

    test_dataset = CIFAR10(root=args.data_root,
                           train=False,
                           download=True,
                           transform=test_transform)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size_test,
        shuffle=False,
        num_workers=args.workers,
    )

    best_acc1 = 0

    model = convnet(num_classes=10)
    model = model.to(device)

    # Use the right distributed module wrapper if distributed training is enabled
    if world_size > 1:
        if not args.disable_dp:
            if args.dist_algo == "naive":
                model = DPDDP(model)
            elif args.dist_algo == "ddp_hook":
                model = DDP(model, device_ids=[device])
            else:
                raise NotImplementedError(
                    f"Unrecognized argument for the distributed algorithm: {args.dist_algo}"
                )
        else:
            model = DDP(model, device_ids=[device])

    if args.optim == "SGD":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optim == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    elif args.optim == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    else:
        raise NotImplementedError(
            "Optimizer not recognized. Please check spelling")

    if not args.disable_dp:
        if args.clip_per_layer:
            # Each layer has the same clipping threshold. The total grad norm is still bounded by `args.max_per_sample_grad_norm`.
            n_layers = len([(n, p) for n, p in model.named_parameters()
                            if p.requires_grad])
            max_grad_norm = [
                args.max_per_sample_grad_norm / np.sqrt(n_layers)
            ] * n_layers
        else:
            max_grad_norm = args.max_per_sample_grad_norm

        privacy_engine = PrivacyEngine(
            model,
            sample_rate=args.sample_rate * args.n_accumulation_steps,
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=max_grad_norm,
            secure_rng=args.secure_rng,
            **clipping,
        )
        privacy_engine.attach(optimizer)

    # Store some logs
    accuracy_per_epoch = []
    time_per_epoch = []

    for epoch in range(args.start_epoch, args.epochs + 1):
        if args.lr_schedule == "cos":
            lr = args.lr * 0.5 * (1 + np.cos(np.pi * epoch /
                                             (args.epochs + 1)))
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        train_duration = train(args, model, train_loader, optimizer, epoch,
                               device)
        top1_acc = test(args, model, test_loader, device)

        # remember best acc@1 and save checkpoint
        is_best = top1_acc > best_acc1
        best_acc1 = max(top1_acc, best_acc1)

        time_per_epoch.append(train_duration)
        accuracy_per_epoch.append(float(top1_acc))

        save_checkpoint(
            {
                "epoch": epoch + 1,
                "arch": "Convnet",
                "state_dict": model.state_dict(),
                "best_acc1": best_acc1,
                "optimizer": optimizer.state_dict(),
            },
            is_best,
            filename=args.checkpoint_file + ".tar",
        )

    if rank == 0:

        time_per_epoch_seconds = [t.total_seconds() for t in time_per_epoch]
        avg_time_per_epoch = sum(time_per_epoch_seconds) / len(
            time_per_epoch_seconds)
        metrics = {
            "accuracy":
            best_acc1,
            "accuracy_per_epoch":
            accuracy_per_epoch,
            "avg_time_per_epoch_str":
            str(timedelta(seconds=int(avg_time_per_epoch))),
            "time_per_epoch":
            time_per_epoch_seconds,
        }

        logger.info(
            "\nNote:\n- 'total_time' includes the data loading time, training time and testing time.\n- 'time_per_epoch' measures the training time only.\n"
        )
        logger.info(metrics)

    if world_size > 1:
        cleanup()
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training")
    parser.add_argument(
        "-j",
        "--workers",
        default=2,
        type=int,
        metavar="N",
        help="number of data loading workers (default: 2)",
    )
    parser.add_argument(
        "--epochs",
        default=90,
        type=int,
        metavar="N",
        help="number of total epochs to run",
    )
    parser.add_argument(
        "--start-epoch",
        default=1,
        type=int,
        metavar="N",
        help="manual epoch number (useful on restarts)",
    )
    parser.add_argument(
        "-b",
        "--batch-size-test",
        default=256,
        type=int,
        metavar="N",
        help="mini-batch size for test dataset (default: 256), this is the total "
        "batch size of all GPUs on the current node when "
        "using Data Parallel or Distributed Data Parallel",
    )
    parser.add_argument(
        "--sample-rate",
        default=0.04,
        type=float,
        metavar="SR",
        help="sample rate used for batch construction (default: 0.005)",
    )
    parser.add_argument(
        "-na",
        "--n_accumulation_steps",
        default=1,
        type=int,
        metavar="N",
        help="number of mini-batches to accumulate into an effective batch",
    )
    parser.add_argument(
        "--lr",
        "--learning-rate",
        default=0.1,
        type=float,
        metavar="LR",
        help="initial learning rate",
        dest="lr",
    )
    parser.add_argument(
        "--momentum", default=0.9, type=float, metavar="M", help="SGD momentum"
    )
    parser.add_argument(
        "--wd",
        "--weight-decay",
        default=0,
        type=float,
        metavar="W",
        help="SGD weight decay",
        dest="weight_decay",
    )
    parser.add_argument(
        "-p",
        "--print-freq",
        default=10,
        type=int,
        metavar="N",
        help="print frequency (default: 10)",
    )
    parser.add_argument(
        "--resume",
        default="",
        type=str,
        metavar="PATH",
        help="path to latest checkpoint (default: none)",
    )
    parser.add_argument(
        "-e",
        "--evaluate",
        dest="evaluate",
        action="store_true",
        help="evaluate model on validation set",
    )
    parser.add_argument(
        "--seed", default=None, type=int, help="seed for initializing training. "
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.5,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=10.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--secure-rng",
        action="store_true",
        default=False,
        help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )

    parser.add_argument(
        "--checkpoint-file",
        type=str,
        default="checkpoint",
        help="path to save check points",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../cifar10",
        help="Where CIFAR10 is/will be stored",
    )
    parser.add_argument(
        "--log-dir", type=str, default="", help="Where Tensorboard log will be stored"
    )
    parser.add_argument(
        "--optim",
        type=str,
        default="SGD",
        help="Optimizer to use (Adam, RMSprop, SGD)",
    )
    parser.add_argument(
        "--lr-schedule", type=str, choices=["constant", "cos"], default="cos"
    )
    parser.add_argument(
        "--local_rank",
        type=int,
        default=-1,
        help="Local rank if multi-GPU training, -1 for single GPU training",
    )

    args = parser.parse_args()

    distributed = False
    if args.local_rank != -1:
        setup()
        distributed = True

    if args.disable_dp and args.n_accumulation_steps > 1:
        raise ValueError("Virtual steps only works with enabled DP")

    # The following few lines, enable stats gathering about the run
    # 1. where the stats should be logged
    stats.set_global_summary_writer(
        tensorboard.SummaryWriter(os.path.join("/tmp/stat", args.log_dir))
    )
    # 2. enable stats
    stats.add(
        # stats about gradient norms aggregated for all layers
        stats.Stat(stats.StatType.GRAD, "AllLayers", frequency=0.1),
        # stats about gradient norms per layer
        stats.Stat(stats.StatType.GRAD, "PerLayer", frequency=0.1),
        # stats about clipping
        stats.Stat(stats.StatType.GRAD, "ClippingStats", frequency=0.1),
        # stats on training accuracy
        stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01),
        # stats on validation accuracy
        stats.Stat(stats.StatType.TEST, "accuracy"),
    )

    # The following lines enable stat gathering for the clipping process
    # and set a default of per layer clipping for the Privacy Engine
    clipping = {"clip_per_layer": False, "enable_stat": True}

    if args.secure_rng:
        try:
            import torchcsprng as prng
        except ImportError as e:
            msg = (
                "To use secure RNG, you must install the torchcsprng package! "
                "Check out the instructions here: https://github.com/pytorch/csprng#installation"
            )
            raise ImportError(msg) from e

        generator = prng.create_random_device_generator("/dev/urandom")

    else:
        generator = None

    augmentations = [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ]
    normalize = [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
    train_transform = transforms.Compose(
        augmentations + normalize if args.disable_dp else normalize
    )

    test_transform = transforms.Compose(normalize)

    train_dataset = CIFAR10(
        root=args.data_root, train=True, download=True, transform=train_transform
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        num_workers=args.workers,
        generator=generator,
        batch_sampler=UniformWithReplacementSampler(
            num_samples=len(train_dataset),
            sample_rate=args.sample_rate,
            generator=generator,
        ),
    )

    test_dataset = CIFAR10(
        root=args.data_root, train=False, download=True, transform=test_transform
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size_test,
        shuffle=False,
        num_workers=args.workers,
    )

    best_acc1 = 0
    if distributed and args.device == "cuda":
        args.device = "cuda:" + str(args.local_rank)
    device = torch.device(args.device)

    model = convnet(num_classes=10)
    model = model.to(device)

    if distributed:
        if not args.disable_dp:
            model = DPDDP(model)
        else:
            model = DDP(model, device_ids=[args.local_rank])

    if args.optim == "SGD":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optim == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    elif args.optim == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    else:
        raise NotImplementedError("Optimizer not recognized. Please check spelling")

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            model,
            sample_rate=args.sample_rate * args.n_accumulation_steps,
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            secure_rng=args.secure_rng,
            **clipping,
        )
        privacy_engine.attach(optimizer)

    for epoch in range(args.start_epoch, args.epochs + 1):
        if args.lr_schedule == "cos":
            lr = args.lr * 0.5 * (1 + np.cos(np.pi * epoch / (args.epochs + 1)))
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        train(args, model, train_loader, optimizer, epoch, device)
        top1_acc = test(args, model, test_loader, device)

        # remember best acc@1 and save checkpoint
        is_best = top1_acc > best_acc1
        best_acc1 = max(top1_acc, best_acc1)

        save_checkpoint(
            {
                "epoch": epoch + 1,
                "arch": "Convnet",
                "state_dict": model.state_dict(),
                "best_acc1": best_acc1,
                "optimizer": optimizer.state_dict(),
            },
            is_best,
            filename=args.checkpoint_file + ".tar",
        )

    if args.local_rank != -1:
        cleanup()