Beispiel #1
0
 def from_file(filename):
     """
     Loads a DDWitness object from a file with given name.
     """
     log.debug("Loading result from file '%s'", filename)
     with open(filename, "rb") as f:
         obj = pickle.load(f)
     assert (type(obj) == DDWitness)
     return obj
Beispiel #2
0
 def _transform(self, x):
     """
     Perform feature transformation.
     """
     if self.feature_transform is not None:
         log.debug("Transforming features...")
         x = self.feature_transform.transform(x)
         log.debug("Done transforming features")
     return x
Beispiel #3
0
 def _get_tensorboard_log_dir(self):
     now = datetime.now()
     t = now.strftime("%Y-%m-%d_%H-%M-%S")
     letters = string.ascii_letters
     rand = ''.join(random.choice(letters) for _ in range(10))
     details = [t, rand, self.label]
     details = '_'.join(str(d) for d in details)
     d = os.path.join(self.tensorboard_dir, details)
     log.debug('Tensorboard directory at ' + d)
     return d
Beispiel #4
0
    def to_tmp_file(self) -> str:
        """
        Stores the result to a temporary file.

        Returns:
            The path of the created temporary file.
        """
        tmp_dir = get_output_directory("tmp")
        fd, filename = tempfile.mkstemp(dir=tmp_dir)
        log.debug("Storing result to file '%s'", filename)
        with os.fdopen(fd, "wb") as f:
            pickle.dump(self, f)
        return filename
Beispiel #5
0
    def best_attack(self, a1, a2) -> MlAttack:
        """
        Runs the optimizer to construct an attack for given input pair a1, a2.

        Args:
            a1: 1d array representing the first input
            a2: 1d array representing the second input

        Returns:
            The constructed MlAttack
        """
        log.debug("Searching best attack for mechanism %s, classifier %s...",
                  type(self.mechanism).__name__,
                  type(self.classifier_factory).__name__)

        classifier = self._train_classifier(a1, a2)

        with time_measure("time_determine_threshold"):
            log.debug("Determining threshold...")

            # TODO: maybe parallelize this loop?
            probabilities = []
            for parallel_size in split_into_parts(self.config.n,
                                                  self.config.n_processes):
                sequential_probabilities = []
                for sequential_size in split_by_batch_size(
                        parallel_size, self.config.prediction_batch_size):
                    # generate samples from a2
                    b_new = self.mechanism.m(a2, sequential_size)
                    if len(b_new.shape) == 1:
                        # make sure b1 and b2 have shape (n_samples, 1)
                        b_new = np.atleast_2d(b_new).T

                    # compute Pr[a1 | M(a1) = b_new]
                    probabilities_new = classifier.predict_probabilities(b_new)

                    # wrap up
                    sequential_probabilities.append(probabilities_new)

                sequential_probabilities = np.concatenate(
                    sequential_probabilities)
                probabilities.append(sequential_probabilities)

            probabilities = np.concatenate(probabilities)
            probabilities[::-1].sort()  # sorts descending, in-place

            assert (probabilities.shape[0] == self.config.n)

        # find optimal threshold
        log.debug("Finding optimal threshold...")
        with time_measure("time_dp_distinguisher_find_threshold"):
            thresh, q = DPSniper._find_threshold(
                probabilities, self.config.c * probabilities.shape[0])
        log.debug("Selected t = %f, q = %f", thresh, q)

        return MlAttack(classifier, thresh, q)
Beispiel #6
0
    def _train_one_batch(self, batch_idx: int, x_train, y_train, x_test,
                         y_test, writer, optimizer, scheduler):
        for epoch in range(self.epochs):
            # not really an "epoch" as it does not loop over the whole data set, but only over one batch

            # closure function required for optimizers such as LBFGS that need to compute
            # the gradient of the loss themselves
            def closure():
                # initialize gradients
                optimizer.zero_grad()

                # compute loss (forward pass)
                y_pred = self.model(x_train).squeeze()
                loss = self._regularized(self.criterion(y_pred, y_train))

                # backward pass
                loss.backward()
                return loss

            optimizer.step(closure)
            if scheduler is not None:
                scheduler.step()

            # recompute loss for logging
            loss = closure()

            # logging
            step_idx = (batch_idx - 1) * self.epochs + epoch
            writer.add_scalar('Loss/train', loss.item(), step_idx)
            log.debug(
                'Batch {}, epoch {} (global step {}): train loss: {}'.format(
                    batch_idx, epoch, step_idx, loss.item()))

            if x_test is not None:
                # compute test loss
                y_pred_test = self.model(x_test).squeeze()
                loss_test = self.criterion(y_pred_test, y_test.squeeze())
                writer.add_scalar('Loss/test', loss_test.item(), step_idx)

                # compute testing accuracy
                accuracy_test = 1 - torch.mean(
                    torch.abs(torch.round(y_pred_test) - y_test))
                writer.add_scalar('Accuracy/test', accuracy_test.item(),
                                  step_idx)
Beispiel #7
0
    def _generate_data_batch(self, a1, a2, n) -> Tuple:
        """
        Generates a training data batch of size 2n (n samples for each input a1 and a2).
        """
        log.debug("Generating training data batch of size 2*%d...", n)

        b1 = self.mechanism.m(a1, n)
        b2 = self.mechanism.m(a2, n)
        if len(b1.shape) == 1:
            # make sure b1 and b2 have shape (n_samples, 1)
            b1 = np.atleast_2d(b1).T
            b2 = np.atleast_2d(b2).T

        # rows = samples, columns = features
        x = np.concatenate((b1, b2), axis=0)

        # 1d array of labels
        y = np.zeros(2 * n)
        y[n:] = 1

        return x, y
Beispiel #8
0
    def compute_eps_estimate(self, a1, a2, attack: Attack) -> (float, float):
        """
        Estimates eps(a2, a2, attack) using samples.

        Returns:
            a tuple (eps, lcb), where eps is the eps estimate and lcb is a lower confidence bound for eps
        """
        p1 = self.pr_estimator.compute_pr_estimate(a1, attack)
        p2 = self.pr_estimator.compute_pr_estimate(a2, attack)
        log.debug("p1=%f, p2=%f", p1, p2)
        log.data("p1", p1)
        log.data("p2", p2)

        if p1 < p2:
            if self.allow_swap:
                p1, p2 = p2, p1
                log.debug("swapped probabilitites p1, p2")
            else:
                log.warning("probability p1 < p2 for eps estimation")

        eps = self._compute_eps(p1, p2)
        lcb = self._compute_lcb(p1, p2)
        return eps, lcb
Beispiel #9
0
    def _compute_results_for_all_inputs(self):
        log.debug("generating inputs...")
        inputs = []
        for (a1, a2) in self.input_generator.get_input_pairs():
            log.debug("%s, %s", a1, a2)
            inputs.append((self, a1, a2))

        log.debug("submitting parallel tasks...")
        result_files = the_parallel_executor.execute(DDSearch._one_input_pair,
                                                     inputs)
        log.debug("parallel tasks done!")

        results = []
        for filename in result_files:
            cur = DDWitness.from_file(filename)
            os.remove(filename)
            results.append(cur)
        return results
Beispiel #10
0
    def _train_classifier(self, a1, a2) -> StableClassifier:
        """
        Trains the classifier for inputs a1, a2.
        """
        def generate_batches():
            for size in split_by_batch_size(self.config.n_train,
                                            self.config.training_batch_size):
                yield self._generate_data_batch(a1, a2, size)

        log.debug("Creating classifier...")
        classifier = self.classifier_factory.create()

        log.debug("Training classifier...")
        with time_measure("time_dp_distinguisher_train"):
            classifier.train(generate_batches())
        log.debug("Done training")

        return classifier
Beispiel #11
0
    def run(self):
        log.info("using configuration %s", self.config)

        attack_opt = DPSniper(self.mechanism, self.classifier_factory, self.config)

        with time_measure("time_dd_search"):
            log.debug("running dd-search...")
            opt = DDSearch(self.mechanism, attack_opt, self.input_pair_sampler, self.config)
            res = opt.run()
        log.debug("finished dd-search, preliminary eps=%f", res.eps)

        with time_measure("time_final_estimate_eps"):
            log.debug("computing final eps estimate...")
            res.compute_eps_high_precision(self.mechanism, self.config)

        log.info("done!")
        log.info("> a1      = {}".format(res.a1))
        log.info("> a2      = {}".format(res.a2))
        log.info("> attack  = {}".format(res.attack))
        log.info("> eps     = {}".format(res.eps))
        log.info("> eps lcb = {}".format(res.lower_bound))

        log.data("eps", res.eps)
        log.data("eps_lcb", res.lower_bound)
Beispiel #12
0
    def _one_input_pair(task):
        optimizer, a1, a2 = task
        pr_estimator = EpsEstimator(optimizer.pr_estimator)

        log.debug("selecting attack...")
        with time_measure("time_dp_distinguisher"):
            attack = optimizer.attack_optimizer.best_attack(a1, a2)
        log.debug("best attack: %s", attack)

        cur = DDWitness(a1, a2, attack)
        log.debug("computing estimate for eps...")
        with time_measure("time_estimate_eps"):
            cur.compute_eps_using_estimator(pr_estimator)
        log.debug("current eps: %s", cur.eps)
        log.data("eps_for_sample", cur.eps)

        log.debug("storing result...")
        filename = cur.to_tmp_file()

        log.debug("done!")
        return filename