def from_file(filename): """ Loads a DDWitness object from a file with given name. """ log.debug("Loading result from file '%s'", filename) with open(filename, "rb") as f: obj = pickle.load(f) assert (type(obj) == DDWitness) return obj
def _transform(self, x): """ Perform feature transformation. """ if self.feature_transform is not None: log.debug("Transforming features...") x = self.feature_transform.transform(x) log.debug("Done transforming features") return x
def _get_tensorboard_log_dir(self): now = datetime.now() t = now.strftime("%Y-%m-%d_%H-%M-%S") letters = string.ascii_letters rand = ''.join(random.choice(letters) for _ in range(10)) details = [t, rand, self.label] details = '_'.join(str(d) for d in details) d = os.path.join(self.tensorboard_dir, details) log.debug('Tensorboard directory at ' + d) return d
def to_tmp_file(self) -> str: """ Stores the result to a temporary file. Returns: The path of the created temporary file. """ tmp_dir = get_output_directory("tmp") fd, filename = tempfile.mkstemp(dir=tmp_dir) log.debug("Storing result to file '%s'", filename) with os.fdopen(fd, "wb") as f: pickle.dump(self, f) return filename
def best_attack(self, a1, a2) -> MlAttack: """ Runs the optimizer to construct an attack for given input pair a1, a2. Args: a1: 1d array representing the first input a2: 1d array representing the second input Returns: The constructed MlAttack """ log.debug("Searching best attack for mechanism %s, classifier %s...", type(self.mechanism).__name__, type(self.classifier_factory).__name__) classifier = self._train_classifier(a1, a2) with time_measure("time_determine_threshold"): log.debug("Determining threshold...") # TODO: maybe parallelize this loop? probabilities = [] for parallel_size in split_into_parts(self.config.n, self.config.n_processes): sequential_probabilities = [] for sequential_size in split_by_batch_size( parallel_size, self.config.prediction_batch_size): # generate samples from a2 b_new = self.mechanism.m(a2, sequential_size) if len(b_new.shape) == 1: # make sure b1 and b2 have shape (n_samples, 1) b_new = np.atleast_2d(b_new).T # compute Pr[a1 | M(a1) = b_new] probabilities_new = classifier.predict_probabilities(b_new) # wrap up sequential_probabilities.append(probabilities_new) sequential_probabilities = np.concatenate( sequential_probabilities) probabilities.append(sequential_probabilities) probabilities = np.concatenate(probabilities) probabilities[::-1].sort() # sorts descending, in-place assert (probabilities.shape[0] == self.config.n) # find optimal threshold log.debug("Finding optimal threshold...") with time_measure("time_dp_distinguisher_find_threshold"): thresh, q = DPSniper._find_threshold( probabilities, self.config.c * probabilities.shape[0]) log.debug("Selected t = %f, q = %f", thresh, q) return MlAttack(classifier, thresh, q)
def _train_one_batch(self, batch_idx: int, x_train, y_train, x_test, y_test, writer, optimizer, scheduler): for epoch in range(self.epochs): # not really an "epoch" as it does not loop over the whole data set, but only over one batch # closure function required for optimizers such as LBFGS that need to compute # the gradient of the loss themselves def closure(): # initialize gradients optimizer.zero_grad() # compute loss (forward pass) y_pred = self.model(x_train).squeeze() loss = self._regularized(self.criterion(y_pred, y_train)) # backward pass loss.backward() return loss optimizer.step(closure) if scheduler is not None: scheduler.step() # recompute loss for logging loss = closure() # logging step_idx = (batch_idx - 1) * self.epochs + epoch writer.add_scalar('Loss/train', loss.item(), step_idx) log.debug( 'Batch {}, epoch {} (global step {}): train loss: {}'.format( batch_idx, epoch, step_idx, loss.item())) if x_test is not None: # compute test loss y_pred_test = self.model(x_test).squeeze() loss_test = self.criterion(y_pred_test, y_test.squeeze()) writer.add_scalar('Loss/test', loss_test.item(), step_idx) # compute testing accuracy accuracy_test = 1 - torch.mean( torch.abs(torch.round(y_pred_test) - y_test)) writer.add_scalar('Accuracy/test', accuracy_test.item(), step_idx)
def _generate_data_batch(self, a1, a2, n) -> Tuple: """ Generates a training data batch of size 2n (n samples for each input a1 and a2). """ log.debug("Generating training data batch of size 2*%d...", n) b1 = self.mechanism.m(a1, n) b2 = self.mechanism.m(a2, n) if len(b1.shape) == 1: # make sure b1 and b2 have shape (n_samples, 1) b1 = np.atleast_2d(b1).T b2 = np.atleast_2d(b2).T # rows = samples, columns = features x = np.concatenate((b1, b2), axis=0) # 1d array of labels y = np.zeros(2 * n) y[n:] = 1 return x, y
def compute_eps_estimate(self, a1, a2, attack: Attack) -> (float, float): """ Estimates eps(a2, a2, attack) using samples. Returns: a tuple (eps, lcb), where eps is the eps estimate and lcb is a lower confidence bound for eps """ p1 = self.pr_estimator.compute_pr_estimate(a1, attack) p2 = self.pr_estimator.compute_pr_estimate(a2, attack) log.debug("p1=%f, p2=%f", p1, p2) log.data("p1", p1) log.data("p2", p2) if p1 < p2: if self.allow_swap: p1, p2 = p2, p1 log.debug("swapped probabilitites p1, p2") else: log.warning("probability p1 < p2 for eps estimation") eps = self._compute_eps(p1, p2) lcb = self._compute_lcb(p1, p2) return eps, lcb
def _compute_results_for_all_inputs(self): log.debug("generating inputs...") inputs = [] for (a1, a2) in self.input_generator.get_input_pairs(): log.debug("%s, %s", a1, a2) inputs.append((self, a1, a2)) log.debug("submitting parallel tasks...") result_files = the_parallel_executor.execute(DDSearch._one_input_pair, inputs) log.debug("parallel tasks done!") results = [] for filename in result_files: cur = DDWitness.from_file(filename) os.remove(filename) results.append(cur) return results
def _train_classifier(self, a1, a2) -> StableClassifier: """ Trains the classifier for inputs a1, a2. """ def generate_batches(): for size in split_by_batch_size(self.config.n_train, self.config.training_batch_size): yield self._generate_data_batch(a1, a2, size) log.debug("Creating classifier...") classifier = self.classifier_factory.create() log.debug("Training classifier...") with time_measure("time_dp_distinguisher_train"): classifier.train(generate_batches()) log.debug("Done training") return classifier
def run(self): log.info("using configuration %s", self.config) attack_opt = DPSniper(self.mechanism, self.classifier_factory, self.config) with time_measure("time_dd_search"): log.debug("running dd-search...") opt = DDSearch(self.mechanism, attack_opt, self.input_pair_sampler, self.config) res = opt.run() log.debug("finished dd-search, preliminary eps=%f", res.eps) with time_measure("time_final_estimate_eps"): log.debug("computing final eps estimate...") res.compute_eps_high_precision(self.mechanism, self.config) log.info("done!") log.info("> a1 = {}".format(res.a1)) log.info("> a2 = {}".format(res.a2)) log.info("> attack = {}".format(res.attack)) log.info("> eps = {}".format(res.eps)) log.info("> eps lcb = {}".format(res.lower_bound)) log.data("eps", res.eps) log.data("eps_lcb", res.lower_bound)
def _one_input_pair(task): optimizer, a1, a2 = task pr_estimator = EpsEstimator(optimizer.pr_estimator) log.debug("selecting attack...") with time_measure("time_dp_distinguisher"): attack = optimizer.attack_optimizer.best_attack(a1, a2) log.debug("best attack: %s", attack) cur = DDWitness(a1, a2, attack) log.debug("computing estimate for eps...") with time_measure("time_estimate_eps"): cur.compute_eps_using_estimator(pr_estimator) log.debug("current eps: %s", cur.eps) log.data("eps_for_sample", cur.eps) log.debug("storing result...") filename = cur.to_tmp_file() log.debug("done!") return filename