Esempio n. 1
0
    def to_tmp_file(self) -> str:
        """
        Stores the result to a temporary file.

        Returns:
            The path of the created temporary file.
        """
        tmp_dir = get_output_directory("tmp")
        fd, filename = tempfile.mkstemp(dir=tmp_dir)
        log.debug("Storing result to file '%s'", filename)
        with os.fdopen(fd, "wb") as f:
            pickle.dump(self, f)
        return filename
 def test_pickle(self):
     model = MultiLayerPerceptron(in_dimensions=2,
                                  optimizer_factory=LBFGSOptimizerFactory(),
                                  hidden_sizes=(50, 50))
     model.train(training_batch_generator())
     probs1 = model.predict_probabilities(x_test)
     filename = tempfile.mktemp(dir=get_output_directory("tmp"))
     with open(filename, "wb") as f:
         pickle.dump(model, f)
     with open(filename, "rb") as f:
         obj = pickle.load(f)
     os.remove(filename)
     assert (isinstance(obj, MultiLayerPerceptron))
     probs2 = obj.predict_probabilities(x_test)
     np.testing.assert_array_equal(probs1, probs2)
Esempio n. 3
0
def run_with_postprocessing(n_processes: int,
                            out_dir: str,
                            only_mechanism=None,
                            postfix=""):
    log.configure("WARNING")
    set_output_directory(out_dir)
    logs_dir = get_output_directory("logs")
    log_file = os.path.join(logs_dir, "statdp{}_log.log".format(postfix))
    data_file = os.path.join(logs_dir, "statdp{}_data.log".format(postfix))

    if os.path.exists(log_file):
        log.warning("removing existing log file '%s'", log_file)
        os.remove(log_file)
    if os.path.exists(data_file):
        log.warning("removing existing log file '%s'", data_file)
        os.remove(data_file)

    log.configure("INFO",
                  log_file=log_file,
                  data_file=data_file,
                  file_level="INFO")

    with initialize_parallel_executor(n_processes, out_dir):
        mechanisms = [
            "LaplaceMechanism", "TruncatedGeometricMechanism", "NoisyHist1",
            "NoisyHist2", "ReportNoisyMax1", "ReportNoisyMax2",
            "ReportNoisyMax3", "ReportNoisyMax4", "SparseVectorTechnique1",
            "SparseVectorTechnique2", "SparseVectorTechnique3",
            "SparseVectorTechnique4", "SparseVectorTechnique5",
            "SparseVectorTechnique6", "Rappor", "OneTimeRappor",
            "LaplaceParallel", "SVT34Parallel", "PrefixSum", "NumericalSVT"
        ]

        if only_mechanism is not None:
            mechanisms = [only_mechanism]

        for alg_name in mechanisms:
            mechanism = statdp_mechanism_map[alg_name]
            kwargs = statdp_arguments_map[alg_name]
            pp_config = statdp_postprocessing_map[alg_name]
            num_inputs = statdp_num_inputs_map[alg_name]
            sensitivity = statdp_sensitivity_map[alg_name]
            run_statdp(alg_name, mechanism, pp_config, num_inputs, sensitivity,
                       kwargs)
    log.info("finished experiments")
Esempio n. 4
0
    def __getstate__(self):
        # store torch model to a file (for pickling)
        self.state_dict_file = NamedTemporaryFile(
            dir=get_output_directory('training', 'models'),
            prefix='MultiLayerPerceptron_',
            suffix='.model',
            delete=False).name
        state_dict = self.model.state_dict()
        torch.save(state_dict, self.state_dict_file)

        # capture what is normally pickled
        state = self.__dict__.copy()

        # clear torch-specific objects
        state['model'] = None
        state['criterion'] = None

        return state
Esempio n. 5
0
def initialize_dpsniper(config: DDConfig,
                        out_dir: Optional[str] = None,
                        torch_threads: Optional[int] = None):
    """
    Helper context manager to initialize DP-Sniper.

    Args:
        config: configuration
        out_dir: directory for temporary files, logs and torch models (temporary directory is created if not specified)
        torch_threads: number of threads to be used by pytorch during training (default torch configuration is used if not specified)
    """
    set_output_directory(out_dir)
    torch_initialize(torch_threads=torch_threads)
    log.configure("WARNING",
                  log_file=os.path.join(get_output_directory("logs"),
                                        "dpsniper.log"),
                  file_level="DEBUG")
    with initialize_parallel_executor(config.n_processes, out_dir=out_dir):
        yield
Esempio n. 6
0
    def run_all(self, n_processes: int, sequential=True):
        """
        Runs all experiments in the experiments list. Because this function initializes the global parallel executor,
        the latter must be uninitialized.

        Args:
            n_processes: Number of processes used for the global parallel executor.
            sequential: Whether the experiments should be ran sequentially (True) or should be parallelized
                    at the highest level using the global executor (False). If this flag is False, experiments
                    _must not_ use the parallel executor themselves (risk of deadlock!).
        """
        set_output_directory(self.output_dir)
        logs_dir = get_output_directory("logs")

        for fname in glob.glob(
                os.path.join(logs_dir, self.series_name + "_*.log")):
            log.warning("removing existing log file '%s'", fname)
            os.remove(fname)

        log_file = os.path.join(logs_dir,
                                "{}_log.log".format(self.series_name))
        data_file = os.path.join(logs_dir,
                                 "{}_data.log".format(self.series_name))
        log.configure("ERROR",
                      log_file=log_file,
                      file_level=self.file_level,
                      data_file=data_file)

        with initialize_parallel_executor(n_processes, self.output_dir):
            if sequential:
                for exp in self.experiments:
                    ExperimentRunner._run_one_experiment(
                        (exp, self.series_comment))
            else:
                inputs = []
                for exp in self.experiments:
                    inputs.append((exp, self.series_comment))
                the_parallel_executor.execute(
                    ExperimentRunner._run_one_experiment, inputs)

        log.info("finished experiments!")
Esempio n. 7
0
    def __init__(self,
                 in_dimensions: int,
                 optimizer_factory: TorchOptimizerFactory,
                 feature_transform: Optional[FeatureTransformer] = None,
                 normalize_input: bool = True,
                 n_test_batches: int = 0,
                 hidden_sizes: Tuple = (10, 5),
                 epochs: int = 10,
                 regularization_weight: float = 0.001,
                 label: Optional[str] = None):
        """
        Creates a feedforward neural network classifier.

        Args:
            in_dimensions: number of input dimensions for the classifier (dimensionality of features)
            optimizer_factory: a factory constructing the optimizer to be used for training
            feature_transform: an optional feature transformer to be applied to the input features
            normalize_input: whether to perform normalization of input features (after feature transformation)
            n_test_batches: number of batches reserved for the test set (non-zero allows to track test accuracy during training)
            hidden_sizes: a tuple (x_1, ..., x_n) of integers defining the number x_i of hidden neurons in the i-th hidden layer
            epochs: number of epochs for training
            regularization_weight: regularization coefficient in [0, 1]
            label: an optional string label for the classifier (used for tensorboard log file names)
        """
        super().__init__(feature_transform=feature_transform,
                         normalize_input=normalize_input)
        self.in_dimensions = in_dimensions
        self.optimizer_factory = optimizer_factory
        self.n_test_batches = n_test_batches
        self.hidden_sizes = hidden_sizes
        self.epochs = epochs
        self.label = label
        self.regularization_weight = regularization_weight

        self.state_dict_file = None  # representation of model for pickling
        self.tensorboard_dir = get_output_directory('training', 'tensorboard')

        # initialize torch-specific fields
        self.model = None
        self.criterion = None
        self._init_torch()
Esempio n. 8
0
    def test_run(self):
        with initialize_parallel_executor(2, get_output_directory()):
            config = DDConfig(n_train=10,
                              n=10,
                              n_check=10,
                              n_final=10,
                              n_processes=2)
            mechanism = ConstantMechanism()

            optimizer_factory = SGDOptimizerFactory()
            classifier_factory = LogisticRegressionFactory(
                in_dimensions=1, optimizer_factory=optimizer_factory)
            attack_optimizer = DPSniper(mechanism, classifier_factory, config)

            input_generator = SingleInputPairGenerator()
            optimizer = DDSearch(mechanism, attack_optimizer, input_generator,
                                 config)

            best = optimizer.run()
            self.assertEqual(best.a1, 20)
            self.assertEqual(best.a2, 20)
            self.assertEqual(best.eps, 0)