def setUp_private_model(
        self,
        noise_multiplier=1.3,
        max_grad_norm=1.0,
    ):
        # Deep copy
        self.private_model = SampleConvNet()  # create the structure
        self.private_model.load_state_dict(
            self.original_model.state_dict())  # fill it
        self.private_optimizer = torch.optim.SGD(
            self.private_model.parameters(), lr=self.LR, momentum=0)

        privacy_engine = PrivacyEngine(
            self.private_model,
            self.dl,
            alphas=self.ALPHAS,
            noise_multiplier=noise_multiplier,
            max_grad_norm=max_grad_norm,
        )
        privacy_engine.attach(self.private_optimizer)

        for x, y in self.dl:
            logits = self.private_model(x)
            loss = self.criterion(logits, y)
            loss.backward()  # puts grad in self.private_model.parameters()
            self.private_optimizer.step()
        self.private_grad_norms = torch.stack(
            [p.grad.norm() for p in self.private_model.parameters()], dim=-1)
Esempio n. 2
0
def prepare_private_training(model, train_loaders, num_workers, batch_size,
                             alphas, lr):

    model_pool = list()
    optimizer_pool = list()
    priv_eng_pool = list()

    # We use deepcopy to make wholly independent copies of the shared model
    for _ in range(num_workers):
        model_pool.append(copy.deepcopy(model))

# We call the SGD constructor each time to ensure model updates are correctly applied
    for model in model_pool:
        opt = optim.SGD(model.parameters(), lr=lr)
        optimizer_pool.append(opt)

    # Attaches privacy engine for each model to each optimiser, effectively replacing
    # gradient calculation functions with similar DP-enabled ones.
    for i in range(len(model_pool)):
        privacy_engine = PrivacyEngine(model_pool[i],
                                       batch_size=batch_size,
                                       sample_size=len(
                                           train_loaders[i].dataset),
                                       alphas=alphas,
                                       noise_multiplier=1.0,
                                       max_grad_norm=1.0)

        privacy_engine.attach(optimizer_pool[i])

    return model_pool, optimizer_pool
Esempio n. 3
0
def hybrid_model(arch="vgg16",
                 hidden_units=4096,
                 class_idx_mapping=None,
                 args=None):
    """
    Return a model based on `arch` pre-trained one and 2 new fully connected layers.
    """
    # Model adapted to chosen architecture, thanks to dynamic execution
    my_local = dict()
    exec(f'model = models.{arch}(pretrained=True)', globals(), my_local)
    model = my_local['model']
    ##    model = utils.convert_batchnorm_modules(model)   # ===== Monitoring =====

    # Freeze existing model parameters for training
    for param in model.parameters():
        param.requires_grad = False

    # Get last child module of imported model
    last_child = list(model.children())[-1]

    if type(last_child) == torch.nn.modules.linear.Linear:
        input_features = last_child.in_features
    elif type(last_child) == torch.nn.modules.container.Sequential:
        input_features = last_child[0].in_features

    # Add some neww layers to train
    classifier = nn.Sequential(
        OrderedDict([  ### vgg16 : input_features = 25088
            ('fc1', nn.Linear(input_features, hidden_units)),
            ('relu', nn.ReLU()),
            ###('dropout', nn.Dropout(p=0.5)),
            ('fc2', nn.Linear(hidden_units, 102)),
            ###('relu2', nn.ReLU()),          ## Traces of
            ###('fc3', nn.Linear(256, 102)),  ##  experiments.
            ('output', nn.LogSoftmax(dim=1))
        ]))
    model.classifier = classifier
    model.class_idx_mapping = class_idx_mapping
    model = model.to(args.device)
    ##    _mem_monitor("1. HYBRID_MODEL : model loaded ", args.device)    # ===== Monitoring =====

    #optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate)
    optimizer = optim.SGD(model.classifier.parameters(), lr=args.learning_rate)

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            classifier,  ### = model, idem with classifier
            batch_size=args.batch_size,
            sample_size=args.sample_size,
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.noise,
            max_grad_norm=args.clip,
        )
        privacy_engine.attach(optimizer)


##    _mem_monitor("HYBRID_MODEL after DP tranfo. ", args.device)   # ===== Monitoring =====

    return model, optimizer
Esempio n. 4
0
    def dp_update_weights(self, model, global_round, args):
        # Set mode to train model
        model.train()
        epoch_loss = []

        # Set optimizer for the local updates
        if self.args.optimizer == 'sgd':

            optimizer = torch.optim.SGD(model.parameters(), lr=self.args.lr,
                                        momentum=0.5)
            privacy_engine = PrivacyEngine(
                model,
                self.trainloader,
                alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=0.65,
                max_grad_norm=1.0,
            )

            privacy_engine.attach(optimizer)



        elif self.args.optimizer == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr,
                                         weight_decay=1e-4)

        for iter in range(self.args.local_ep):
            batch_loss = []
            for batch_idx, (images, labels) in enumerate(self.trainloader):
                images, labels = images.to(self.device), labels.to(self.device)

                model.zero_grad()
                log_probs = model(images)
                loss = self.criterion(log_probs, labels)
                loss.backward()
                optimizer.step()

                # if self.args.verbose and (batch_idx % 10 == 0):
                #     print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                #         global_round, iter, batch_idx * len(images),
                #         len(self.trainloader.dataset),
                #                             100. * batch_idx / len(self.trainloader), loss.item()))
                self.logger.add_scalar('loss', loss.item())
                batch_loss.append(loss.item())
            epoch_loss.append(sum(batch_loss) / len(batch_loss))

            epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(
                args.delta
            )
            print(
                f"(Ɛ = {epsilon}, 𝛿 = {args.delta}) for α = {best_alpha}"
            )
            # if epsilon > args.epsilon:
            #     break

        return model.state_dict(), sum(epoch_loss) / len(epoch_loss)
Esempio n. 5
0
 def setUp_privacy_engine(self, batch_size):
     self.privacy_engine = PrivacyEngine(
         self.model,
         batch_size=batch_size,
         sample_size=self.DATA_SIZE,
         alphas=self.ALPHAS,
         noise_multiplier=0,
         max_grad_norm=999,
     )
     self.privacy_engine.attach(self.optimizer)
Esempio n. 6
0
    def train(self):
        """
        train/update the curr model of the agent
        """
        optimizer = optim.Adadelta(self.model.parameters(), lr=self.lr)
        scheduler = StepLR(optimizer, step_size=1, gamma=self.gamma)
        loss_func = nn.CrossEntropyLoss()

        if self.dp:
            self.model.zero_grad()
            optimizer.zero_grad()
            clear_backprops(self.model)

            privacy_engine = PrivacyEngine(
                self.model,
                batch_size=self.bs,
                sample_size=self.num_train_samples,
                alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=self.sigma,
                max_grad_norm=self.C)
            privacy_engine.attach(optimizer)

        if self.device == 'cuda':
            self.model.to('cuda')
        self.model.train()
        for _ in range(self.epochs):
            num_batches = len(self.train_loader)
            start, end = 0, num_batches
            if self.fed_avg:
                start, end = self.random_idx, self.random_idx + 1
                self.random_idx += 1
                if self.random_idx >= num_batches:
                    self.random_idx = 0

            with torch.set_grad_enabled(True):
                for batch_idx, (data, target) in enumerate(self.train_loader):
                    if start <= batch_idx < end:
                        if self.device == 'cuda':
                            data, target = data.to('cuda'), target.to('cuda')
                        optimizer.zero_grad()
                        output = self.model(data)
                        loss = loss_func(output, target)
                        loss.backward()
                        optimizer.step()
                        self.logs['train_loss'].append(copy.deepcopy(loss.item()))

            scheduler.step()
            self.lr = get_lr(optimizer)
            if self.fl_train is False:
                curr_acc = eval(self.model, self.test_loader, self.device)
                self.logs['val_acc'].append(copy.deepcopy(curr_acc))
Esempio n. 7
0
 def test_model_validator(self):
     """
     Test that the privacy engine throws on attach
     if there are unsupported modules
     """
     privacy_engine = PrivacyEngine(
         models.resnet18(),
         batch_size=self.BATCH_SIZE,
         sample_size=self.DATA_SIZE,
         alphas=self.ALPHAS,
         noise_multiplier=1.3,
         max_grad_norm=1,
     )
     with self.assertRaises(IncompatibleModuleException):
         privacy_engine.attach(self.private_optimizer)
Esempio n. 8
0
    def train(self):
        """
        train/update the curr model of the agent
        """
        #optimizer = optim.Adadelta(self.model.parameters(), lr=self.lr)
        optimizer = optim.Adam(self.model.parameters(), lr=1e-3)
        #scheduler = StepLR(optimizer, step_size=1, gamma=self.gamma)
        if self.dp:
            self.model.zero_grad()
            optimizer.zero_grad()
            clear_backprops(self.model)

            privacy_engine = PrivacyEngine(
                self.model,
                batch_size=self.bs,
                sample_size=self.num_train_samples,
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=self.sigma,
                max_grad_norm=self.C)
            privacy_engine.attach(optimizer)

        if self.device == 'cuda':
            self.model.to('cuda')
        self.model.train()
        for _ in range(self.epochs):
            num_batches = len(self.train_loader)
            default_list = list(range(num_batches))
            if self.fed_avg:
                default_list = np.random.choice(default_list, 1, replace=False)
            for batch_idx, (data, target) in enumerate(self.train_loader):
                if batch_idx in default_list:
                    if self.device == 'cuda':
                        data, target = data.to('cuda'), target.to('cuda')
                    optimizer.zero_grad()
                    output = self.model(data)
                    loss = F.nll_loss(output, target)
                    loss.backward()
                    optimizer.step()
                    self.logs['train_loss'].append(copy.deepcopy(loss.item()))

            #scheduler.step()
            self.lr = get_lr(optimizer)
            if self.fl_train is False:
                curr_acc = eval(self.model, self.test_loader, self.device)
                self.logs['val_acc'].append(copy.deepcopy(curr_acc))
Esempio n. 9
0
    def test_privacy_engine_class_example(self):
        # IMPORTANT: When changing this code you also need to update
        # the docstring for torchdp.privacy_engine.PrivacyEngine
        batch_size = 8
        sample_size = 64

        model = torch.nn.Linear(16, 32)  # An example model
        optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
        privacy_engine = PrivacyEngine(
            model,
            batch_size,
            sample_size,
            alphas=range(2, 32),
            noise_multiplier=1.3,
            max_grad_norm=1.0,
        )
        privacy_engine.attach(
            optimizer)  # That's it! Now it's business as usual.
Esempio n. 10
0
def train_model(model,
                dataloader,
                lr,
                epoch_num,
                dldp_setting=(0.0, 5.0),
                verbose=True,
                testloader=None):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    if dldp_setting[0] != 0:
        from torchdp import PrivacyEngine
        privacy_engine = PrivacyEngine(model,
                                       dataloader,
                                       alphas=0.0,
                                       noise_multiplier=dldp_setting[0],
                                       max_grad_norm=dldp_setting[1])
        privacy_engine.attach(optimizer)

    for epoch in range(epoch_num):
        cum_loss = 0.0
        cum_acc = 0.0
        cum_pred = []
        cum_lab = []
        tot = 0.0
        for i, (x_in, y_in) in enumerate(dataloader):
            B = x_in.size()[0]
            pred = model(x_in).squeeze(1)
            loss = model.loss(pred, y_in)
            model.zero_grad()
            loss.backward()
            optimizer.step()

            cum_loss += loss.item() * B
            cum_acc += ((pred > 0).cpu().long().eq(y_in)).sum().item()
            cum_pred = cum_pred + list(pred.detach().cpu().numpy())
            cum_lab = cum_lab + list(y_in.numpy())
            tot = tot + B

        if verbose:
            print("Epoch %d, loss = %.4f, acc = %.4f, auc = %.4f" %
                  (epoch, cum_loss / tot, cum_acc / tot,
                   roc_auc_score(cum_lab, cum_pred)))
            if testloader is not None:
                print(eval_binary_model(model, testloader))
                model.train()
Esempio n. 11
0
 def setUpOptimizer(self,
                    model: nn.Module,
                    data_loader: DataLoader,
                    privacy_engine: bool = False):
     # sample parameter values
     optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
     optimizer.zero_grad()
     if privacy_engine:
         pe = PrivacyEngine(
             model,
             batch_size=data_loader.batch_size,
             sample_size=len(data_loader.dataset),
             alphas=[1 + x / 10.0
                     for x in range(1, 100)] + list(range(12, 64)),
             noise_multiplier=1.3,
             max_grad_norm=1,
         )
         pe.attach(optimizer)
     return optimizer
Esempio n. 12
0
    def test_privacy_engine_to_example(self):
        # IMPORTANT: When changing this code you also need to update
        # the docstring for torchdp.privacy_engine.PrivacyEngine.to()
        batch_size = 8
        sample_size = 64

        model = torch.nn.Linear(16,
                                32)  # An example model. Default device is CPU
        privacy_engine = PrivacyEngine(
            model,
            batch_size,
            sample_size,
            alphas=range(5, 64),
            noise_multiplier=0.8,
            max_grad_norm=0.5,
        )
        device = "cpu"
        model.to(
            device
        )  # If we move the model to GPU, we should call the to() method of the privacy engine (next line)
        privacy_engine.to(device)
Esempio n. 13
0
 def setUpOptimizer(
     self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False
 ):
     # sample parameter values
     optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
     optimizer.zero_grad()
     if privacy_engine:
         pe = PrivacyEngine(
             model,
             # pyre-fixme[6]: Expected `int` for 2nd param but got `Optional[int]`.
             batch_size=data_loader.batch_size,
             # pyre-fixme[6]: Expected `Sized` for 1st param but got
             #  `Dataset[typing.Any]`.
             sample_size=len(data_loader.dataset),
             # pyre-fixme[6]: `+` is not supported for operand types
             #  `List[float]` and `List[int]`.
             alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
             noise_multiplier=1.3,
             max_grad_norm=1,
         )
         pe.attach(optimizer)
     return optimizer
Esempio n. 14
0
    def setUp_init_model(self,
                         private=False,
                         state_dict=None,
                         model=None,
                         **privacy_engine_kwargs):
        model = model or SampleConvNet()
        optimizer = torch.optim.SGD(model.parameters(), lr=self.LR, momentum=0)
        if state_dict:
            model.load_state_dict(state_dict)

        if private:
            if len(privacy_engine_kwargs) == 0:
                privacy_engine_kwargs = self.privacy_default_params
            privacy_engine = PrivacyEngine(
                model,
                batch_size=self.BATCH_SIZE,
                sample_size=self.DATA_SIZE,
                alphas=self.ALPHAS,
                **privacy_engine_kwargs,
            )
            privacy_engine.attach(optimizer)

        return model, optimizer
Esempio n. 15
0
    def test_privacy_engine_virtual_step_example(self):
        # IMPORTANT: When changing this code you also need to update
        # the docstring for torchdp.privacy_engine.PrivacyEngine.virtual_step()
        model = nn.Linear(16, 2)
        dataloader = []
        batch_size = 64
        sample_size = 256

        for _ in range(64):
            data = torch.randn(4, 16)
            labels = torch.randint(0, 2, (4, ))
            dataloader.append((data, labels))

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

        privacy_engine = PrivacyEngine(
            model,
            batch_size,
            sample_size,
            alphas=range(5, 64),
            noise_multiplier=0.8,
            max_grad_norm=0.5,
        )
        privacy_engine.attach(optimizer)

        for i, (X, y) in enumerate(dataloader):
            logits = model(X)
            loss = criterion(logits, y)
            loss.backward()
            if i % 16 == 15:
                optimizer.step()  # this will call privacy engine's step()
                optimizer.zero_grad()
            else:
                optimizer.virtual_step(
                )  # this will call privacy engine's virtual_step()
Esempio n. 16
0
class GradientAccumulation_test(unittest.TestCase):
    def setUp(self):
        self.DATA_SIZE = 64
        self.BATCH_SIZE = 16
        self.LR = 0  # we want to call optimizer.step() without modifying the model
        self.ALPHAS = [1 + x / 10.0 for x in range(1, 100, 10)]
        self.criterion = nn.CrossEntropyLoss()

        self.setUp_data()
        self.setUp_model_and_optimizer()

    def setUp_data(self):
        self.ds = FakeData(
            size=self.DATA_SIZE,
            image_size=(1, 35, 35),
            num_classes=10,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ]),
        )
        self.dl = DataLoader(self.ds, batch_size=self.BATCH_SIZE)

    def setUp_model_and_optimizer(self):
        self.model = SampleConvNet()
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.LR,
                                         momentum=0)

        self.optimizer.zero_grad()

        # accumulate .grad over the entire dataset
        for x, y in self.dl:
            logits = self.model(x)
            loss = self.criterion(logits, y)
            loss.backward()

        self.effective_batch_grad = torch.cat([
            p.grad.reshape(-1)
            for p in self.model.parameters() if p.requires_grad
        ]) * (self.BATCH_SIZE / self.DATA_SIZE)

        self.optimizer.zero_grad()

    def setUp_privacy_engine(self, batch_size):
        self.privacy_engine = PrivacyEngine(
            self.model,
            batch_size=batch_size,
            sample_size=self.DATA_SIZE,
            alphas=self.ALPHAS,
            noise_multiplier=0,
            max_grad_norm=999,
        )
        self.privacy_engine.attach(self.optimizer)

    def calc_per_sample_grads(self, data_iter, num_steps=1):
        for x, y in data_iter:
            num_steps -= 1
            logits = self.model(x)
            loss = self.criterion(logits, y)
            loss.backward()
            if num_steps == 0:
                break

    def test_grad_sample_accumulation(self):
        """
        Calling loss.backward() multiple times should sum up the gradients in .grad
        and accumulate all the individual gradients in .grad-sample
        """
        self.setUp_privacy_engine(self.DATA_SIZE)
        data_iter = iter(self.dl)  # 4 batches of size 4 each
        self.calc_per_sample_grads(data_iter, num_steps=4)
        # should accumulate grads in .grad and .grad_sample

        # the accumulated per-sample gradients
        per_sample_grads = torch.cat(
            [
                p.grad_sample.reshape(self.DATA_SIZE, -1)
                for p in self.model.parameters() if p.requires_grad
            ],
            dim=-1,
        )
        # average up all the per-sample gradients
        accumulated_grad = torch.mean(per_sample_grads, dim=0)

        # the full data gradient accumulated in .grad
        grad = torch.cat([
            p.grad.reshape(-1)
            for p in self.model.parameters() if p.requires_grad
        ]) * (self.BATCH_SIZE / self.DATA_SIZE)

        self.optimizer.step()

        # the accumulated gradients in .grad without any hooks
        orig_grad = self.effective_batch_grad

        self.assertTrue(
            torch.allclose(accumulated_grad, orig_grad, atol=10e-5,
                           rtol=10e-3))
        self.assertTrue(torch.allclose(grad, orig_grad, atol=10e-5,
                                       rtol=10e-3))

    def test_clipper_accumulation(self):
        """
        Calling optimizer.virtual_step() should accumulate clipped gradients to form
        one large batch.
        """
        self.setUp_privacy_engine(self.DATA_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        for _ in range(3):  # take 3 virtual steps
            self.calc_per_sample_grads(data, num_steps=1)
            self.optimizer.virtual_step()

        # accumulate on the last step
        self.calc_per_sample_grads(data, num_steps=1)
        self.optimizer.step()

        # .grad should contain the average gradient over the entire dataset
        accumulated_grad = torch.cat([
            p.grad.reshape(-1) for p in self.model.parameters()
            if p.requires_grad
        ])

        # the accumulated gradients in .grad without any hooks
        orig_grad = self.effective_batch_grad

        self.assertTrue(
            torch.allclose(accumulated_grad, orig_grad, atol=10e-5,
                           rtol=10e-3),
            f"Values are {accumulated_grad} vs {orig_grad}."
            f"MAD is {(orig_grad - accumulated_grad).abs().mean()}")

    def test_mixed_accumulation(self):
        """
        Calling loss.backward() multiple times aggregates all per-sample gradients in
        .grad-sample. Then, calling optimizer.virtual_step() should clip all gradients
        and aggregate them into one large batch.
        """
        self.setUp_privacy_engine(self.DATA_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        # accumulate per-sample grads for two mini batches
        self.calc_per_sample_grads(data, num_steps=2)
        # take a virtual step
        self.optimizer.virtual_step()
        # accumulate another two mini batches
        self.calc_per_sample_grads(data, num_steps=2)
        # take a step
        self.optimizer.step()

        # .grad should contain the average gradient over the entire dataset
        accumulated_grad = torch.cat([
            p.grad.reshape(-1) for p in self.model.parameters()
            if p.requires_grad
        ])

        # the accumulated gradients in .grad without any hooks
        orig_grad = self.effective_batch_grad

        self.assertTrue(
            torch.allclose(accumulated_grad, orig_grad, atol=10e-5,
                           rtol=10e-3))

    def test_grad_sample_erased(self):
        """
        Calling optimizer.step() should erase any accumulated per-sample gradients.
        """
        self.setUp_privacy_engine(2 * self.BATCH_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        for _ in range(2):
            # accumulate per-sample gradients for two mini-batches to form an
            # effective batch of size `2*BATCH_SIZE`. Once an effective batch
            # has been accumulated, we call `optimizer.step()` to clip and
            # average the per-sample gradients. This should erase the
            # `grad_sample` fields for each parameter
            self.calc_per_sample_grads(data, num_steps=2)
            self.optimizer.step()

            for param_name, param in self.model.named_parameters():
                if param.requires_grad:
                    self.assertFalse(
                        hasattr(param, "grad_sample"),
                        f"Per-sample gradients haven't been erased "
                        f"for {param_name}",
                    )

    def test_summed_grad_erased(self):
        """
        Calling optimizer.step() should erase any accumulated clipped gradients.
        """

        self.setUp_privacy_engine(2 * self.BATCH_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        for idx in range(4):
            self.calc_per_sample_grads(data, num_steps=1)

            if idx % 2 == 0:
                # perform a virtual step for each mini-batch
                # this will accumulate clipped gradients in each parameter's
                # `summed_grads` field.
                self.optimizer.virtual_step()
                for param_name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self.assertTrue(
                            hasattr(param, "summed_grad"),
                            f"Clipped gradients aren't accumulated "
                            f"for {param_name}",
                        )
            else:
                # accumulate gradients for two mini-batches to form an
                # effective batch of size `2*BATCH_SIZE`. Once an effective batch
                # has been accumulated, we call `optimizer.step()` to compute the
                # average gradient for the entire batch. This should erase the
                # `summed_grads` fields for each parameter.
                # take a step. The clipper will compute the mean gradient
                # for the entire effective batch and populate each parameter's
                # `.grad` field.
                self.optimizer.step()

                for param_name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self.assertFalse(
                            hasattr(param, "summed_grad"),
                            f"Accumulated clipped gradients haven't been erased "
                            f"¨for {param_name}",
                        )

    def test_throws_wrong_batch_size(self):
        """
        If we accumulate the wrong number of gradients and feed this batch to
        the privacy engine, we expect a failure.
        """
        self.setUp_privacy_engine(2 * self.BATCH_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        # consuming a batch that is smaller than expected should work
        self.calc_per_sample_grads(data, num_steps=1)
        with self.assertWarns(Warning):
            self.optimizer.step()
            self.optimizer.zero_grad()

        # consuming a larger batch than expected should fail
        for _ in range(2):
            self.calc_per_sample_grads(data, num_steps=1)
            self.optimizer.virtual_step()
        with self.assertRaises(ValueError):
            self.calc_per_sample_grads(data, num_steps=1)
            self.optimizer.step()
Esempio n. 17
0
criterion = nn.BCELoss()

FIXED_NOISE = torch.randn(opt.batch_size, nz, 1, 1, device=device)
REAL_LABEL = 1
FAKE_LABEL = 0

# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

privacy_engine = PrivacyEngine(
    netD,
    batch_size=opt.batch_size,
    # pyre-fixme[6]: Expected `Sized` for 1st param but got `Dataset[typing.Any]`.
    sample_size=len(dataloader.dataset),
    # pyre-fixme[6]: `+` is not supported for operand types `List[float]` and
    #  `List[int]`.
    alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
    noise_multiplier=opt.sigma,
    max_grad_norm=opt.max_per_sample_grad_norm,
)
if not opt.disable_dp:
    privacy_engine.attach(optimizerD)
optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

for epoch in range(opt.epochs):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        netD.zero_grad()
Esempio n. 18
0
if opt.netD != '':
    netD.load_state_dict(torch.load(opt.netD))

criterion = nn.BCELoss()

FIXED_NOISE = torch.randn(opt.batch_size, nz, 1, 1, device=device)
REAL_LABEL = 1
FAKE_LABEL = 0

# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

privacy_engine = PrivacyEngine(
    netD,
    batch_size=opt.batch_size,
    sample_size=len(dataloader.dataset),
    alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
    noise_multiplier=opt.sigma,
    max_grad_norm=opt.max_per_sample_grad_norm
)
if not opt.disable_dp:
    privacy_engine.attach(optimizerD)
optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))

for epoch in range(opt.epochs):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        netD.zero_grad()

        real_data = data[0].to(device)
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch Adult Example")
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=256,
        metavar="B",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1024,
        metavar="TB",
        help="input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=10,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        metavar="R",
        help="number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=0.15,
        metavar="LR",
        help="learning rate (default: .1)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=0.55,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )

    args = parser.parse_args()
    device = torch.device(args.device)

    kwargs = {"num_workers": 1, "pin_memory": True}
    """Loads ADULT a2a as in LIBSVM and preprocesses to combine training and validation data."""
    # https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html
    x = pd.read_csv('adult.csv')
    trainData, testData = train_test_split(x, test_size=0.1, random_state=218)
    # have to reset index, see https://discuss.pytorch.org/t/keyerror-when-enumerating-over-dataloader/54210/13
    trainData = trainData.reset_index()
    testData = testData.reset_index()

    train_data = trainData.iloc[:, 1:-1].astype('float32')
    test_data = testData.iloc[:, 1:-1].astype('float32')
    train_labels = (trainData.iloc[:, -1] == 1).astype('int32')
    test_labels = (testData.iloc[:, -1] == 1).astype('int32')

    train_loader = torch.utils.data.DataLoader(AdultDataset(
        train_data, train_labels),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(AdultDataset(
        test_data, test_labels),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    run_results = []
    for _ in range(args.n_runs):
        model = SampleConvNet().to(device)

        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0)
        if not args.disable_dp:
            privacy_engine = PrivacyEngine(
                model,
                batch_size=args.batch_size,
                sample_size=len(train_loader.dataset),
                alphas=[1 + x / 10.0 for x in range(1, 100)] +
                list(np.arange(12, 60, 0.1)) + list(np.arange(61, 100, 1)),
                noise_multiplier=args.sigma,
                max_grad_norm=args.max_per_sample_grad_norm,
            )
            privacy_engine.attach(optimizer)
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)

        run_results.append(test(args, model, device, test_loader))

    if len(run_results) > 1:
        print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format(
            len(run_results),
            np.mean(run_results) * 100,
            np.std(run_results) * 100))
Esempio n. 20
0
    def _fit_model(
        self,
        ind,
        samples,
        labels,
        weights,
        batch_size,
        num_epochs,
        learning_rate,
        max_grad_norm,
        noise_multiplier,
        delta,
    ):
        dataset = TensorDataset(
            torch.from_numpy(samples).float(), torch.from_numpy(labels))

        kwargs = {"num_workers": 1, "pin_memory": True} if HAS_CUDA else {}
        if type(weights).__name__ == "ndarray":
            sampler = WeightedRandomSampler(weights=weights,
                                            num_samples=len(samples))
            loader = DataLoader(dataset=dataset,
                                batch_size=batch_size,
                                sampler=sampler,
                                **kwargs)
        else:
            loader = DataLoader(dataset=dataset,
                                batch_size=batch_size,
                                shuffle=True,
                                **kwargs)

        model = self.models[ind]

        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        privacy_engine = PrivacyEngine(
            model,
            batch_size,
            len(dataset),
            alphas=[1, 10, 100],
            noise_multiplier=noise_multiplier,
            max_grad_norm=max_grad_norm,
            target_delta=delta,
            loss_reduction='sum',
        )

        privacy_engine.attach(optimizer)
        for epoch in range(1, num_epochs + 1):
            model.train()
            train_loss = 0
            for batch_ind, (data, cond) in enumerate(loader):
                data, cond = data.to(DEVICE), self._one_hot(cond).to(DEVICE)
                optimizer.zero_grad()
                recon_batch, mu, log_var = model(data, cond)
                loss = self._loss_function(recon_batch, data, mu, log_var)
                loss.backward()
                train_loss += loss.item()
                optimizer.step()
                if batch_ind % 100 == 0:
                    print("epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                        epoch,
                        batch_ind * len(data),
                        len(loader.dataset),
                        100.0 * batch_ind / len(loader),
                        loss.item() / len(data),
                    ))
            print("====> epoch: {} avg loss: {:.4f}".format(
                epoch, train_loss / len(loader.dataset)))
Esempio n. 21
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=64,
        metavar="B",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1024,
        metavar="TB",
        help="input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=14,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        metavar="R",
        help="number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=1.0,
        metavar="LR",
        help="learning rate (default: 1.0)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.0,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="For Saving the current Model",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../mnist",
        help="Where MNIST is/will be stored",
    )
    args = parser.parse_args()
    device = torch.device(args.device)

    kwargs = {"num_workers": 1, "pin_memory": True}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=True,
            download=True,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs,
    )
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=False,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )
    run_results = []
    for _run in range(1, args.n_runs + 1):
        model = SampleConvNet().to(device)
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0)
        if not args.disable_dp:
            privacy_engine = PrivacyEngine(
                model,
                train_loader,
                alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=args.sigma,
                max_grad_norm=args.max_per_sample_grad_norm,
            )
            privacy_engine.attach(optimizer)
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
        run_results.append(test(args, model, device, test_loader))

    run_results = torch.Tensor(run_results)
    print(
        f"Accuracy: {torch.mean(run_results).item()} ± {torch.std(run_results).item()}"
    )
    repro_str = (
        f"{model.name()}_{args.lr}_{args.sigma}_"
        f"{args.max_per_sample_grad_norm}_{args.batch_size}_{args.epochs}"
    )
    torch.save(run_results, f"run_results_{repro_str}.pt")

    if args.save_model:
        torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
Esempio n. 22
0
def train(architecture='softmax'):
	n = nn.Sequential(
		nn.Flatten(),
		nn.Linear(in_features=112 * 92, out_features=40),
	) if architecture == 'softmax' else nn.Sequential(
		nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5, padding=2, stride=1),
		nn.Flatten(),
		nn.Linear(in_features=112 * 92, out_features=40),
	) if architecture == 'conv 1 channel' else nn.Sequential(
		nn.Conv2d(in_channels=1, out_channels=3, kernel_size=5, padding=2, stride=1),
		nn.Flatten(),
		nn.Linear(in_features=112 * 92 * 3, out_features=40),
	) if architecture == 'conv 3 channel' else nn.Sequential(
		nn.Flatten(),
		nn.Linear(in_features=112 * 92, out_features=1500),
		nn.ReLU(),
		nn.Linear(in_features=1500, out_features=40),
	)
	lr = 0.01
	optimizer = torch.optim.Adam(n.parameters(), lr=lr)

	train_features = torch.load(join(os.curdir, dirname(__file__), f'train_features{os.extsep}pt')).float()
	train_labels = torch.load(join(os.curdir, dirname(__file__), f'train_labels{os.extsep}pt')).long()
	test_features = torch.load(join(os.curdir, dirname(__file__), f'test_features{os.extsep}pt')).float()
	test_labels = torch.load(join(os.curdir, dirname(__file__), f'test_labels{os.extsep}pt')).long()

	if len(sys.argv) > 1:
		privacy_engine = PrivacyEngine(
			n,
			batch_size=train_labels.shape[0],
			sample_size=train_labels.shape[0],
			alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
			noise_multiplier=float(sys.argv[1]),
			max_grad_norm=1.5,
		)
		privacy_engine.attach(optimizer)

	train_losses = []
	test_losses = []
	train_accuracy = []
	test_accuracy = []
	print(f'Train Network {architecture} with learning rate {lr}' + (f' and sigma {float(sys.argv[1])}' if len(sys.argv) > 1 else ''))
	num_epochs = 101
	with tqdm(total=num_epochs, dynamic_ncols=True) as pbar:
		for i in range(num_epochs):
			pred_train_labels = n(train_features)
			loss = F.cross_entropy(pred_train_labels, train_labels)
			train_losses.append(loss.item())
			train_accuracy.append((pred_train_labels.max(axis=1).indices == train_labels).sum().item() / len(train_labels))

			optimizer.zero_grad()
			loss.backward()
			optimizer.step()

			if i % 5 == 0:
				n.eval()
				with torch.no_grad():
					pred_test_labels = n(test_features)
					loss = F.cross_entropy(pred_test_labels, test_labels)
					test_losses.append((i, loss.item()))
					test_accuracy.append((i, (pred_test_labels.max(axis=1).indices == test_labels).sum().item() / len(test_labels)))
				n.train()
			if len(sys.argv) > 1:
				delta = 1e-5
				epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(delta)
				pbar.set_description(f'Loss = {np.mean(train_losses):.4f}, ε = {epsilon:.2f}')
			pbar.update(1)

	with torch.no_grad():
		n.eval()
		print(f'Train performance: {(n(train_features).max(axis=1).indices == train_labels).sum().item() / len(train_labels) * 100:.2f}%')
		print(f'Test performance: {(n(test_features).max(axis=1).indices == test_labels).sum().item() / len(test_labels) * 100:.2f}%')
		plt.plot(range(len(train_losses)), train_losses, label='Train loss')
		plt.plot([t[0] for t in test_losses], [t[1] for t in test_losses], label='Validation loss')
		plt.legend()
		plt.title('Loss of training and validation')
		plt.show()
		plt.plot(range(len(train_accuracy)), train_accuracy, label='Train accuracy')
		plt.plot([t[0] for t in test_accuracy], [t[1] for t in test_accuracy], label='Validation accuracy')
		plt.legend()
		plt.title('Accuracy of training and validation')
		plt.show()

	model_invert(1, 200, 0.01, n)
Esempio n. 23
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training")
    parser.add_argument(
        "-j",
        "--workers",
        default=2,
        type=int,
        metavar="N",
        help="number of data loading workers (default: 2)",
    )
    parser.add_argument(
        "--epochs",
        default=90,
        type=int,
        metavar="N",
        help="number of total epochs to run",
    )
    parser.add_argument(
        "--start-epoch",
        default=1,
        type=int,
        metavar="N",
        help="manual epoch number (useful on restarts)",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        default=256,
        type=int,
        metavar="N",
        help="mini-batch size (default: 256), this is the total "
        "batch size of all GPUs on the current node when "
        "using Data Parallel or Distributed Data Parallel",
    )
    parser.add_argument(
        "-na",
        "--n_accumulation_steps",
        default=1,
        type=int,
        metavar="N",
        help="number of mini-batches to accumulate into an effective batch",
    )
    parser.add_argument(
        "--lr",
        "--learning-rate",
        default=0.001,
        type=float,
        metavar="LR",
        help="initial learning rate",
        dest="lr",
    )
    parser.add_argument("--momentum",
                        default=0.9,
                        type=float,
                        metavar="M",
                        help="SGD momentum")
    parser.add_argument(
        "--wd",
        "--weight-decay",
        default=5e-4,
        type=float,
        metavar="W",
        help="SGD weight decay (default: 1e-4)",
        dest="weight_decay",
    )
    parser.add_argument(
        "-p",
        "--print-freq",
        default=10,
        type=int,
        metavar="N",
        help="print frequency (default: 10)",
    )
    parser.add_argument(
        "--resume",
        default="",
        type=str,
        metavar="PATH",
        help="path to latest checkpoint (default: none)",
    )
    parser.add_argument(
        "-e",
        "--evaluate",
        dest="evaluate",
        action="store_true",
        help="evaluate model on validation set",
    )
    parser.add_argument("--seed",
                        default=None,
                        type=int,
                        help="seed for initializing training. ")
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.0,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )

    parser.add_argument(
        "--checkpoint-file",
        type=str,
        default="checkpoint",
        help="path to save check points",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../cifar10",
        help="Where CIFAR10 is/will be stored",
    )
    parser.add_argument("--log-dir",
                        type=str,
                        default="",
                        help="Where Tensorboard log will be stored")
    parser.add_argument(
        "--optim",
        type=str,
        default="Adam",
        help="Optimizer to use (Adam, RMSprop, SGD)",
    )

    args = parser.parse_args()

    # The following few lines, enable stats gathering about the run
    # 1. where the stats should be logged
    stats.set_global_summary_writer(
        tensorboard.SummaryWriter(os.path.join("/tmp/stat", args.log_dir)))
    # 2. enable stats
    stats.add(
        # stats about gradient norms aggregated for all layers
        stats.Stat(stats.StatType.CLIPPING, "AllLayers", frequency=0.1),
        # stats about gradient norms per layer
        stats.Stat(stats.StatType.CLIPPING, "PerLayer", frequency=0.1),
        # stats about clipping
        stats.Stat(stats.StatType.CLIPPING, "ClippingStats", frequency=0.1),
        # stats on training accuracy
        stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01),
        # stats on validation accuracy
        stats.Stat(stats.StatType.TEST, "accuracy"),
    )

    # The following lines enable stat gathering for the clipping process
    # and set a default of per layer clipping for the Privacy Engine
    clipping = {"clip_per_layer": False, "enable_stat": True}
    augmentations = [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ]
    normalize = [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ]
    train_transform = transforms.Compose(
        augmentations + normalize if args.disable_dp else normalize)

    test_transform = transforms.Compose(normalize)

    train_dataset = CIFAR10(root=args.data_root,
                            train=True,
                            download=True,
                            transform=train_transform)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        drop_last=True,
    )

    test_dataset = CIFAR10(root=args.data_root,
                           train=False,
                           download=True,
                           transform=test_transform)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
    )

    best_acc1 = 0
    device = torch.device(args.device)
    model = utils.convert_batchnorm_modules(models.resnet18(num_classes=10))
    model = model.to(device)

    if args.optim == "SGD":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optim == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    elif args.optim == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    else:
        raise NotImplementedError(
            "Optimizer not recognized. Please check spelling")

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            model,
            batch_size=args.batch_size * args.n_accumulation_steps,
            sample_size=len(train_dataset),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            **clipping,
        )
        privacy_engine.attach(optimizer)

    for epoch in range(args.start_epoch, args.epochs + 1):
        train(args, model, train_loader, optimizer, epoch, device)
        top1_acc = test(args, model, test_loader, device)

        # remember best acc@1 and save checkpoint
        is_best = top1_acc > best_acc1
        best_acc1 = max(top1_acc, best_acc1)

        save_checkpoint(
            {
                "epoch": epoch + 1,
                "arch": "ResNet18",
                "state_dict": model.state_dict(),
                "best_acc1": best_acc1,
                "optimizer": optimizer.state_dict(),
            },
            is_best,
            filename=args.checkpoint_file + ".tar",
        )
Esempio n. 24
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch IMDB Example")
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=64,
        metavar="B",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=10,
        metavar="N",
        help="number of epochs to train (default: 10)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=0.02,
        metavar="LR",
        help="learning rate (default: .02)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=0.56,
        metavar="S",
        help="Noise multiplier (default 0.56)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--vocab-size",
        type=int,
        default=10_000,
        metavar="MV",
        help="Max vocab size (default: 10000)",
    )
    parser.add_argument(
        "--sequence-length",
        type=int,
        default=256,
        metavar="SL",
        help="Longer sequences will be cut to this length, shorter sequences will be padded to this length (default: 256)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla optimizer",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../imdb",
        help="Where IMDB is/will be stored",
    )

    args = parser.parse_args()
    device = torch.device(args.device)

    text_field = torchtext.data.Field(
        tokenize=get_tokenizer("basic_english"),
        init_token="<sos>",
        eos_token="<eos>",
        fix_length=args.sequence_length,
        lower=True,
    )

    label_field = torchtext.data.LabelField(dtype=torch.long)

    train_data, test_data = torchtext.datasets.imdb.IMDB.splits(
        text_field, label_field, root=args.data_root
    )

    text_field.build_vocab(train_data, max_size=args.vocab_size)
    label_field.build_vocab(train_data)

    (train_iterator, test_iterator) = torchtext.data.BucketIterator.splits(
        (train_data, test_data), batch_size=args.batch_size, device=device
    )

    model = SampleNet(vocab_size=args.vocab_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            model,
            batch_size=args.batch_size,
            sample_size=len(train_data),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
        )
        privacy_engine.attach(optimizer)

    for epoch in range(1, args.epochs + 1):
        train(args, model, train_iterator, optimizer, epoch)
        evaluate(args, model, test_iterator)
Esempio n. 25
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=64,
        metavar="B",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1024,
        metavar="TB",
        help="input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=3,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        metavar="R",
        help="number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=.1,
        metavar="LR",
        help="learning rate (default: .1)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.0,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../mnist",
        help="Where MNIST is/will be stored",
    )
    parser.add_argument(
        "--save_e",
        type=str,
        default="epsilon.png",
        help="Path of output chart",
    )
    parser.add_argument(
        "--save_l",
        type=str,
        default="loss.png",
        help="Path of output chart",
    )
    args = parser.parse_args()
    #fo = open(parser.savefile, 'w')
    device = torch.device(args.device)

    kwargs = {"num_workers": 1, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, ))
            ]),
        ),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs,
    )
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, ))
            ]),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )
    run_results = []

    lr_list = [0.25, 0.25, 0.25, 0.15, 0.25, 0.25]
    sigma_list = [1.3, 1.1, 0.7, 1.1, 1.0, 1.1]
    c_list = [1.5, 1.0, 1.5, 1.0, 1.0, 1.5]

    #plt.figure()
    #ax1 = plt.subplot(211)
    #ax2 = plt.subplot(212)
    fig1 = plt.figure()
    fig2 = plt.figure()
    ax1 = fig1.add_subplot(111)
    ax1.set_title('Epsilon over epochs')
    ax2 = fig2.add_subplot(111)
    ax2.set_title('Loss over epochs')
    for _ in range(len(lr_list)):
        model = SampleConvNet().to(device)

        optimizer = optim.SGD(model.parameters(), lr=lr_list[_], momentum=0)
        if not args.disable_dp:
            privacy_engine = PrivacyEngine(
                model,
                batch_size=args.batch_size,
                sample_size=len(train_loader.dataset),
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=sigma_list[_],
                max_grad_norm=c_list[_],
            )
            privacy_engine.attach(optimizer)

        loss_list, epsilon_list = [], []
        for epoch in range(1, args.epochs + 1):
            l, e = train(args, model, device, train_loader, optimizer, epoch)
            loss_list.append(l)
            epsilon_list.append(e)

        color = np.random.rand(3, )
        #plt.sca(ax1)
        ax1.plot(epsilon_list,
                 c=color,
                 label="lr={:.2f} σ={:.1f} c={:.1f}".format(
                     lr_list[_], sigma_list[_], c_list[_]))
        #plt.sca(ax2)
        ax2.plot(loss_list,
                 c=color,
                 label="lr={:.2f} σ={:.1f} c={:.1f}".format(
                     lr_list[_], sigma_list[_], c_list[_]))
        run_results.append(test(args, model, device, test_loader))
    ax1.legend()
    fig1.savefig(args.save_e)
    ax2.legend()
    fig2.savefig(args.save_l)
    '''
    plt.sca(ax1)
    plt.title('Epsilon over epochs')
    plt.legend()
    plt.sca(ax2)
    plt.title('Loss over epochs')
    plt.legend()
    plt.savefig(args.savefile)
    '''
    '''
    if len(run_results) > 1:
        print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format(
            len(run_results),
            np.mean(run_results) * 100,
            np.std(run_results) * 100
        )
        )
    '''

    repro_str = (
        f"{model.name()}_{args.lr}_{args.sigma}_"
        f"{args.max_per_sample_grad_norm}_{args.batch_size}_{args.epochs}")
    torch.save(run_results, f"run_results_{repro_str}.pt")

    if args.save_model:
        torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
Esempio n. 26
0
def main():
    # ===== DETECT CUDA IF AVAILABLE =====
    device_name = "cuda" if torch.cuda.is_available() else "cpu"
    device = torch.device(device_name)
    print("Running on", device_name.upper())

    # ===== LOAD DATA =====
    # PIL [0, 1] images to [-1, 1] Tensors
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(root='./data',
                                            train=True,
                                            download=True,
                                            transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=4)

    testset = torchvision.datasets.CIFAR10(root='./data',
                                           train=False,
                                           download=True,
                                           transform=transform)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=4,
                                             shuffle=False,
                                             num_workers=4)

    args.sample_size = len(trainloader.dataset)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # ===== BUILD NET MODEL =====

    # REM: to restart from a saved model
    #net = Net() # Or another choice, then
    #net.load_state_dict(torch.load(PATH))

    # *********************************************************
    # 0: home-made net
    # 1: not pre-trained VGG16
    # 2: pre-trained VGG16
    # 3: (not frozen) pre-trained VGG16 + fully connected layer
    # 4: frozen pre-trained VGG16 + fully connected layer
    # *********************************************************

    if args.mode == 0:
        # Home made, local definition : Net or Net2
        net = Net()
        optimizer = optim.SGD(net.parameters(),
                              lr=args.learning_rate,
                              momentum=0.9)

    elif args.mode in [1, 2]:
        net = vgg16(pretrained=(args.mode == 2))

        # Adapt output to 10 classes
        input_features = net.classifier[6].in_features
        net.classifier[6] = nn.Linear(input_features, 10)

        optimizer = optim.SGD(net.parameters(),
                              lr=args.learning_rate,
                              momentum=0.9)

    else:  # Pre-trained
        net = vgg16(pretrained=True)

        if args.mode == 4:
            # Freeze existing model parameters for training
            #  (or juste first convolutional layers != "classifier")
            #for param in net.parameters():
            #    param.requires_grad = False
            for name, param in net.named_parameters():
                if name[:10] != "classifier": param.requires_grad = False

        if args.mode > 2:
            # Add some neww layers to train
            # Verification (before)
            #            last_child = list(net.children())[-1]
            #            print("\tLAST CHILD:", last_child)

            # Just adapt to 10 categories
            #input_features = last_child[0].in_features
            input_features = net.classifier[6].in_features
            net.classifier[6] = nn.Linear(input_features, 10)

            dp_mod = net.classifier if args.mode == 4 else net
            optimizer = optim.SGD(dp_mod.parameters(),
                                  lr=args.learning_rate,
                                  momentum=0.9)

            # Verification
#            last_child = list(net.children())[-1]
#            print("\tLAST CHILD:", last_child)

#        else: # args.mode == 2
#            # Adapt output to 10 classes
#            input_features = net.classifier[6].in_features
#            net.classifier[6] = nn.Linear(input_features, 10)

#            optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9)

    net.to(device)

    # ===== DP ===============================================
    dp_mod = net.classifier if args.mode == 4 else net
    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            dp_mod,
            batch_size=args.batch_size,
            sample_size=args.sample_size,
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.noise,
            max_grad_norm=args.clip,
        )
        privacy_engine.attach(optimizer)


# ========================================================

    criterion = nn.CrossEntropyLoss()

    # Structure of network
    #    param_show(net)

    # ===== TRAIN MODEL =====
    print(
        f"Dataset and network are ready (mode {args.mode}), let's train our model "
        f"(×{args.epochs} epoch" + ("s" if args.epochs > 1 else "") + ")...")

    # (Just use `testloader=None` to avoid tests after each epoch)
    accur = train(net,
                  optimizer,
                  criterion,
                  trainloader,
                  args.epochs,
                  device,
                  save=False,
                  testloader=testloader,
                  args=args)

    #===== TEST MODEL =====
    #
    # Already done during training (except details)

    #    acc, categ_acc = test(net, testloader, categories=args.categories, device=device)
    #    accur.append(acc)

    #    print(f'Accuracy of the network on the 10000 test images: {acc:.2f} %')
    #    if args.categories:
    #        for i in range(10):
    #            print(f'Accuracy of {classes[i]:5s} : {categ_acc[i]:.2f} %')

    print(f'size={args.sample_size}, '
          f'bs={args.batch_size}, '
          f'nm={args.noise}, '
          f'ep={args.epochs}, '
          f'd={args.delta}, '
          f'cl={args.clip}, '
          f'lr={args.learning_rate}, '
          f'hu={args.hidden_units}, '
          f'M={args.mode}\n'
          f'acc={accur}')
Esempio n. 27
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "-b",
        "--batch-size",
        type=int,
        default=64,
        metavar="B",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1024,
        metavar="TB",
        help="input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=10,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        metavar="R",
        help="number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=.1,
        metavar="LR",
        help="learning rate (default: .1)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.0,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default='cuda' if torch.cuda.is_available() else 'cpu',
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=True,
        help="Save the trained model",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../mnist",
        help="Where MNIST is/will be stored",
    )
    parser.add_argument(
        "--seed",
        type=int,
        default=0,
        help="Random seed for deterministic runs",
    )
    args = parser.parse_args()
    print(dumps(vars(args), indent=4, sort_keys=True))
    device = torch.device(args.device)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    kwargs = {"num_workers": 1, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, ))
            ]),
        ),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs,
    )
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, ))
            ]),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )
    run_results = []
    for _ in range(args.n_runs):
        model = SampleConvNet().to(device)

        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0)
        if not args.disable_dp:
            privacy_engine = PrivacyEngine(
                model,
                batch_size=args.batch_size,
                sample_size=len(train_loader.dataset),
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=args.sigma,
                max_grad_norm=args.max_per_sample_grad_norm,
            )
            privacy_engine.attach(optimizer)
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
        run_results.append(test(args, model, device, test_loader))

    if len(run_results) > 1:
        print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format(
            len(run_results),
            np.mean(run_results) * 100,
            np.std(run_results) * 100))

    repro_str = (
        f"{model.name()}_{args.lr}_{args.sigma}_"
        f"{args.max_per_sample_grad_norm}_{args.batch_size}_{args.epochs}")
    torch.save(run_results, f"run_results_{repro_str}.pt")

    if args.save_model:
        torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
Esempio n. 28
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(
            backend=args.dist_backend,
            init_method=args.dist_url,
            world_size=args.world_size,
            rank=args.rank,
        )
    # create model: resnet 18
    # since our differential privacy engine does not support BatchNormXd
    # we need to replace all such blocks with DP-aware normalisation modules
    model = utils.convert_batchnorm_modules(models.resnet18(num_classes=10))
    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
    )

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = "cuda:{}".format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint["epoch"]
            best_acc1 = checkpoint["best_acc1"]
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint["state_dict"])
            optimizer.load_state_dict(checkpoint["optimizer"])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint["epoch"]))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, "train")
    valdir = os.path.join(args.data, "val")
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]),
    )

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        num_workers=args.workers,
        pin_memory=True,
        sampler=train_sampler,
    )

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ]),
        ),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    if not args.disable_dp:
        print("PRIVACY ENGINE ON")
        privacy_engine = PrivacyEngine(
            model,
            batch_size=args.batch_size,
            sample_size=len(train_dataset),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            **clipping,
        )
        privacy_engine.attach(optimizer)
    else:
        print("PRIVACY ENGINE OFF")

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "arch": "SampleConvNet",
                    "state_dict": model.state_dict(),
                    "best_acc1": best_acc1,
                    "optimizer": optimizer.state_dict(),
                },
                is_best,
                filename=args.checkpoint_file + ".tar",
            )
Esempio n. 29
0
def main():
    args = parser.parse_args()
    device = torch.device(args.device)

    all_filenames = glob.glob(args.training_path)
    all_letters = string.ascii_letters + " .,;'#"
    n_letters = len(all_letters)

    category_lines, all_categories, n_categories = build_category_lines(
        all_filenames, all_letters
    )
    category_lines_train, category_lines_val = split_data_train_eval(
        category_lines, args.train_eval_split
    )
    rnn = CharNNClassifier(
        n_letters, args.n_hidden, n_categories, n_letters, args.batch_size
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(rnn.parameters(), lr=args.learning_rate)

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            rnn,
            batch_size=args.batch_size,
            sample_size=get_dataset_size(category_lines_train),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            batch_first=False,
        )
        privacy_engine.attach(optimizer)

    # Measure time elapsed for profiling training
    def time_since(since):
        now = time.time()
        s = now - since
        m = math.floor(s / 60)
        s -= m * 60
        return "%dm %ds" % (m, s)

    # Keep track of losses for tracking
    current_loss = 0

    start_time = time.time()
    for iteration in tqdm(range(1, args.iterations + 1)):
        # Get a random training input and target batch
        _, _, category_tensors, line_tensors = get_random_batch(
            category_lines_train,
            args.batch_size,
            all_categories,
            all_letters,
            n_letters,
            args,
            device,
        )
        output, loss = train(
            rnn, criterion, optimizer, category_tensors, line_tensors, device
        )
        current_loss += loss

        # Print iteration number, loss, name and guess
        if iteration % print_every == 0:
            acc = get_eval_metrics(
                rnn,
                category_lines_val,
                all_categories,
                all_letters,
                n_letters,
                args.batch_size,
                args.max_seq_length,
                device,
            )
            time_elapsed = time_since(start_time)

            if not args.disable_dp:
                epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(
                    args.delta
                )
                print(
                    f"Iteration={iteration} / Time elapsed: {time_elapsed} / Loss={loss:.4f} / "
                    f"Eval Accuracy:{acc*100:.2f} / "
                    f"Ɛ = {epsilon:.2f}, 𝛿 = {args.delta:.2f}) for α = {best_alpha:.2f}"
                )
            else:
                print(
                    f"Iteration={iteration} / Time elapsed: {time_elapsed} / Loss={loss:.4f} / "
                    f"Eval Accuracy:{acc*100:.2f}"
                )
Esempio n. 30
0
    def train(self, data):
        if isinstance(data, pd.DataFrame):
            for col in data.columns:
                data[col] = pd.to_numeric(data[col], errors='ignore')
            self.pd_cols = data.columns
            self.pd_index = data.pd_index
            data = data.to_numpy()
        elif not isinstance(data, np.ndarray):
            raise ValueError("Data must be a numpy array or pandas dataframe")

        dataset = TensorDataset(
            torch.from_numpy(data.astype('float32')).to(self.device))
        dataloader = DataLoader(dataset,
                                batch_size=self.batch_size,
                                shuffle=True,
                                drop_last=True)

        self.generator = Generator(self.latent_dim,
                                   data.shape[1],
                                   binary=self.binary).to(self.device)
        discriminator = Discriminator(data.shape[1]).to(self.device)
        optimizer_d = optim.Adam(discriminator.parameters(), lr=4e-4)

        privacy_engine = PrivacyEngine(
            discriminator,
            batch_size=self.batch_size,
            sample_size=len(data),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=3.5,
            max_grad_norm=1.0,
            clip_per_layer=True)

        privacy_engine.attach(optimizer_d)
        optimizer_g = optim.Adam(self.generator.parameters(), lr=1e-4)

        criterion = nn.BCELoss()

        for epoch in range(self.epochs):
            for i, data in enumerate(dataloader):
                discriminator.zero_grad()

                real_data = data[0].to(self.device)

                # train with fake data
                noise = torch.randn(self.batch_size,
                                    self.latent_dim,
                                    1,
                                    1,
                                    device=self.device)
                noise = noise.view(-1, self.latent_dim)
                fake_data = self.generator(noise)
                label_fake = torch.full((self.batch_size, ),
                                        0,
                                        device=self.device)
                output = discriminator(fake_data.detach())
                loss_d_fake = criterion(output, label_fake)
                loss_d_fake.backward()
                optimizer_d.step()

                # train with real data
                label_true = torch.full((self.batch_size, ),
                                        1,
                                        device=self.device)
                output = discriminator(real_data.float())
                loss_d_real = criterion(output, label_true)
                loss_d_real.backward()
                optimizer_d.step()

                loss_d = loss_d_real + loss_d_fake

                max_grad_norm = []
                for p in discriminator.parameters():
                    param_norm = p.grad.data.norm(2).item()
                    max_grad_norm.append(param_norm)

                privacy_engine.max_grad_norm = max_grad_norm

                # train generator
                self.generator.zero_grad()
                label_g = torch.full((self.batch_size, ),
                                     1,
                                     device=self.device)
                output_g = discriminator(fake_data)
                loss_g = criterion(output_g, label_g)
                loss_g.backward()
                optimizer_g.step()

                # manually clear gradients
                for p in discriminator.parameters():
                    if hasattr(p, "grad_sample"):
                        del p.grad_sample

                if self.delta is None:
                    self.delta = 1 / data.shape[0]

                eps, best_alpha = optimizer_d.privacy_engine.get_privacy_spent(
                    self.delta)

            if self.epsilon < eps:
                break