def to_tmp_file(self) -> str: """ Stores the result to a temporary file. Returns: The path of the created temporary file. """ tmp_dir = get_output_directory("tmp") fd, filename = tempfile.mkstemp(dir=tmp_dir) log.debug("Storing result to file '%s'", filename) with os.fdopen(fd, "wb") as f: pickle.dump(self, f) return filename
def test_pickle(self): model = MultiLayerPerceptron(in_dimensions=2, optimizer_factory=LBFGSOptimizerFactory(), hidden_sizes=(50, 50)) model.train(training_batch_generator()) probs1 = model.predict_probabilities(x_test) filename = tempfile.mktemp(dir=get_output_directory("tmp")) with open(filename, "wb") as f: pickle.dump(model, f) with open(filename, "rb") as f: obj = pickle.load(f) os.remove(filename) assert (isinstance(obj, MultiLayerPerceptron)) probs2 = obj.predict_probabilities(x_test) np.testing.assert_array_equal(probs1, probs2)
def run_with_postprocessing(n_processes: int, out_dir: str, only_mechanism=None, postfix=""): log.configure("WARNING") set_output_directory(out_dir) logs_dir = get_output_directory("logs") log_file = os.path.join(logs_dir, "statdp{}_log.log".format(postfix)) data_file = os.path.join(logs_dir, "statdp{}_data.log".format(postfix)) if os.path.exists(log_file): log.warning("removing existing log file '%s'", log_file) os.remove(log_file) if os.path.exists(data_file): log.warning("removing existing log file '%s'", data_file) os.remove(data_file) log.configure("INFO", log_file=log_file, data_file=data_file, file_level="INFO") with initialize_parallel_executor(n_processes, out_dir): mechanisms = [ "LaplaceMechanism", "TruncatedGeometricMechanism", "NoisyHist1", "NoisyHist2", "ReportNoisyMax1", "ReportNoisyMax2", "ReportNoisyMax3", "ReportNoisyMax4", "SparseVectorTechnique1", "SparseVectorTechnique2", "SparseVectorTechnique3", "SparseVectorTechnique4", "SparseVectorTechnique5", "SparseVectorTechnique6", "Rappor", "OneTimeRappor", "LaplaceParallel", "SVT34Parallel", "PrefixSum", "NumericalSVT" ] if only_mechanism is not None: mechanisms = [only_mechanism] for alg_name in mechanisms: mechanism = statdp_mechanism_map[alg_name] kwargs = statdp_arguments_map[alg_name] pp_config = statdp_postprocessing_map[alg_name] num_inputs = statdp_num_inputs_map[alg_name] sensitivity = statdp_sensitivity_map[alg_name] run_statdp(alg_name, mechanism, pp_config, num_inputs, sensitivity, kwargs) log.info("finished experiments")
def __getstate__(self): # store torch model to a file (for pickling) self.state_dict_file = NamedTemporaryFile( dir=get_output_directory('training', 'models'), prefix='MultiLayerPerceptron_', suffix='.model', delete=False).name state_dict = self.model.state_dict() torch.save(state_dict, self.state_dict_file) # capture what is normally pickled state = self.__dict__.copy() # clear torch-specific objects state['model'] = None state['criterion'] = None return state
def initialize_dpsniper(config: DDConfig, out_dir: Optional[str] = None, torch_threads: Optional[int] = None): """ Helper context manager to initialize DP-Sniper. Args: config: configuration out_dir: directory for temporary files, logs and torch models (temporary directory is created if not specified) torch_threads: number of threads to be used by pytorch during training (default torch configuration is used if not specified) """ set_output_directory(out_dir) torch_initialize(torch_threads=torch_threads) log.configure("WARNING", log_file=os.path.join(get_output_directory("logs"), "dpsniper.log"), file_level="DEBUG") with initialize_parallel_executor(config.n_processes, out_dir=out_dir): yield
def run_all(self, n_processes: int, sequential=True): """ Runs all experiments in the experiments list. Because this function initializes the global parallel executor, the latter must be uninitialized. Args: n_processes: Number of processes used for the global parallel executor. sequential: Whether the experiments should be ran sequentially (True) or should be parallelized at the highest level using the global executor (False). If this flag is False, experiments _must not_ use the parallel executor themselves (risk of deadlock!). """ set_output_directory(self.output_dir) logs_dir = get_output_directory("logs") for fname in glob.glob( os.path.join(logs_dir, self.series_name + "_*.log")): log.warning("removing existing log file '%s'", fname) os.remove(fname) log_file = os.path.join(logs_dir, "{}_log.log".format(self.series_name)) data_file = os.path.join(logs_dir, "{}_data.log".format(self.series_name)) log.configure("ERROR", log_file=log_file, file_level=self.file_level, data_file=data_file) with initialize_parallel_executor(n_processes, self.output_dir): if sequential: for exp in self.experiments: ExperimentRunner._run_one_experiment( (exp, self.series_comment)) else: inputs = [] for exp in self.experiments: inputs.append((exp, self.series_comment)) the_parallel_executor.execute( ExperimentRunner._run_one_experiment, inputs) log.info("finished experiments!")
def __init__(self, in_dimensions: int, optimizer_factory: TorchOptimizerFactory, feature_transform: Optional[FeatureTransformer] = None, normalize_input: bool = True, n_test_batches: int = 0, hidden_sizes: Tuple = (10, 5), epochs: int = 10, regularization_weight: float = 0.001, label: Optional[str] = None): """ Creates a feedforward neural network classifier. Args: in_dimensions: number of input dimensions for the classifier (dimensionality of features) optimizer_factory: a factory constructing the optimizer to be used for training feature_transform: an optional feature transformer to be applied to the input features normalize_input: whether to perform normalization of input features (after feature transformation) n_test_batches: number of batches reserved for the test set (non-zero allows to track test accuracy during training) hidden_sizes: a tuple (x_1, ..., x_n) of integers defining the number x_i of hidden neurons in the i-th hidden layer epochs: number of epochs for training regularization_weight: regularization coefficient in [0, 1] label: an optional string label for the classifier (used for tensorboard log file names) """ super().__init__(feature_transform=feature_transform, normalize_input=normalize_input) self.in_dimensions = in_dimensions self.optimizer_factory = optimizer_factory self.n_test_batches = n_test_batches self.hidden_sizes = hidden_sizes self.epochs = epochs self.label = label self.regularization_weight = regularization_weight self.state_dict_file = None # representation of model for pickling self.tensorboard_dir = get_output_directory('training', 'tensorboard') # initialize torch-specific fields self.model = None self.criterion = None self._init_torch()
def test_run(self): with initialize_parallel_executor(2, get_output_directory()): config = DDConfig(n_train=10, n=10, n_check=10, n_final=10, n_processes=2) mechanism = ConstantMechanism() optimizer_factory = SGDOptimizerFactory() classifier_factory = LogisticRegressionFactory( in_dimensions=1, optimizer_factory=optimizer_factory) attack_optimizer = DPSniper(mechanism, classifier_factory, config) input_generator = SingleInputPairGenerator() optimizer = DDSearch(mechanism, attack_optimizer, input_generator, config) best = optimizer.run() self.assertEqual(best.a1, 20) self.assertEqual(best.a2, 20) self.assertEqual(best.eps, 0)