def test_initialization(self):
     params = Parameters()
     workers = [Worker(0, params)]
     zero_tensor = torch.zeros(params.n_dimensions, dtype=np.float)
     update = ArtemisUpdate(params, workers)
     self.assertTrue(torch.equal(update.g, zero_tensor))
     self.assertTrue(torch.equal(update.h, zero_tensor))
     self.assertTrue(torch.equal(update.v, zero_tensor))
     self.assertTrue(torch.equal(update.l, zero_tensor))
     self.assertTrue(torch.equal(update.value_to_compress, zero_tensor))
Beispiel #2
0
 def test_Diana(self):
     params = Diana().define(n_dimensions=DIM, nb_devices=1, quantization_param=10)
     params.up_learning_rate = 0.5
     workers = [Worker(0, params)]
     workers[0].set_data(x, y)
     workers[0].cost_model.L = workers[0].cost_model.local_L
     update = ArtemisUpdate(params, workers)
     new_model_param = update.compute(w, 2, 2)
     # Check that gradients have been updated.
     self.assertFalse(torch.equal(update.g, zero_tensor))
     self.assertFalse(torch.equal(update.v, zero_tensor))
     self.assertFalse(torch.equal(update.h, zero_tensor))
     self.assertTrue(torch.equal(update.H, zero_tensor))
     # Checking that for the return nothing has been quantized.
     # there is a pb, with this test. Pass if ran with Artmis settings.
     self.assertTrue(torch.equal(update.value_to_compress, zero_tensor))
Beispiel #3
0
 def test_Artemis(self):
     params = Artemis().define(n_dimensions=DIM, nb_devices=1, quantization_param=10)
     params.up_learning_rate = 0.5
     workers = [Worker(0, params)]
     workers[0].set_data(x, y)
     workers[0].cost_model.L = workers[0].cost_model.local_L
     update = ArtemisUpdate(params, workers)
     update.compute(w, 2, 2)
     # Check that gradients have been updated.
     self.assertFalse(torch.equal(update.g, zero_tensor))
     self.assertFalse(torch.equal(update.v, zero_tensor))
     self.assertFalse(torch.equal(update.h, zero_tensor))
     # Check that l has been updated.
     self.assertTrue(torch.equal(update.H, zero_tensor))
     # Check that correct value has been compressed
     self.assertTrue(torch.equal(update.value_to_compress, update.g))
Beispiel #4
0
 def test_QSGD(self):
     params = Qsgd().define(n_dimensions=DIM, nb_devices=1, quantization_param=10)
     workers = [Worker(0, params)]
     workers[0].set_data(x, y)
     workers[0].cost_model.L = workers[0].cost_model.local_L
     update = ArtemisUpdate(params, workers)
     update.compute(w, 2, 2)
     # Check that gradients have been updated.
     # Check that gradients have been updated.
     self.assertFalse(torch.equal(update.g, zero_tensor))
     self.assertFalse(torch.equal(update.v, zero_tensor))
     # Checking that no memory have been updated.
     self.assertTrue(torch.equal(update.h, zero_tensor))
     self.assertTrue(torch.equal(update.H, zero_tensor))
     # Checking that for the return nothing has been quantized.
     self.assertTrue(torch.equal(update.value_to_compress, zero_tensor))
Beispiel #5
0
 def setUpClass(cls):
     """ get_some_resource() is slow, to avoid calling it for each test use setUpClass()
         and store the result as class variable
     """
     super(TestRandomizedAlgo, cls).setUpClass()
     cls.cost_models = build_several_cost_model(RMSEModel, X, Y, number_of_device)
     cls.params = RandMCM().define(n_dimensions=dim,
                                   nb_devices=number_of_device,
                                   up_compression_model=SQuantization(1, dim),
                                   down_compression_model=SQuantization(1, dim),
                                   nb_epoch=1,
                                   cost_models=cls.cost_models,
                                   step_formula=constant_step_size)
     cls.params.down_learning_rate = 1 / cls.params.down_compression_model.omega_c
     cls.params.up_learning_rate = 1
     cls.workers = [Worker(i, cls.params, LocalArtemisUpdate) for i in range(number_of_device)]
 def test_doubleMODELcompression_without_memory(self):
     params = SGDDoubleModelCompressionWithoutMem().define(
         n_dimensions=DIM, nb_devices=1, quantization_param=10)
     params.learning_rate = 0.5
     workers = [Worker(0, params)]
     workers[0].set_data(x, y)
     workers[0].cost_model.L = workers[0].cost_model.local_L
     update = ArtemisUpdate(params, workers)
     new_w = update.compute(w, 2, 2)
     # Check that gradients have been updated.
     self.assertFalse(torch.equal(update.g, zero_tensor))
     self.assertFalse(torch.equal(update.v, zero_tensor))
     self.assertFalse(torch.equal(update.h, zero_tensor))
     # Check that l has been updated.
     self.assertTrue(torch.equal(update.l, zero_tensor))
     # Check that correct value has been compressed
     self.assertTrue(torch.equal(update.value_to_compress, new_w))
Beispiel #7
0
    def __init__(self, parameters: Parameters) -> None:
        """Initialization of the gradient descent.

        It initialize all the worker of the network, the sequence of (averaged) losses,
        the sequence of (averaged) models.

        Args:
            parameters: the parameters of the descent.
        """
        super().__init__()
        self.parameters = parameters
        self.train_losses = []
        self.norm_error_feedback = []
        self.dist_to_model = [torch.tensor(0.)]
        self.var_models = [torch.tensor(0.)]
        self.model_params = []
        self.averaged_model_params = []
        self.averaged_train_losses = []
        self.memory_info = None

        if self.parameters.use_up_memory and self.parameters.up_compression_model.omega_c != 0 and self.parameters.up_learning_rate is None:
            self.parameters.up_learning_rate = 1 / (
                2 * (self.parameters.up_compression_model.omega_c + 1))
        elif not self.parameters.use_up_memory or self.parameters.up_compression_model.omega_c == 0:
            self.parameters.up_learning_rate = 0
        if self.parameters.use_down_memory and self.parameters.down_compression_model.omega_c != 0 and self.parameters.down_learning_rate is None:
            self.parameters.down_learning_rate = 1 / (
                2 * (self.parameters.down_compression_model.omega_c + 1))
        elif not self.parameters.use_down_memory or self.parameters.down_compression_model.omega_c == 0:
            self.parameters.down_learning_rate = 0

        if self.parameters.use_up_memory:
            self.parameters.error_feedback_coef = 1 / (
                self.parameters.up_compression_model.omega_c + 1)

        # Creating each worker of the network.
        self.workers = [
            Worker(i, parameters, self.__local_update__())
            for i in range(self.parameters.nb_devices)
        ]

        # Call for the update method of the gradient descent.
        self.update = self.__update_method__()
Beispiel #8
0
 def test_doubleMODELcompression_WITH_memory(self):
     params = MCM().define(n_dimensions=DIM, nb_devices=1,
                                                                      quantization_param=10)
     params.up_learning_rate = 0.5
     workers = [Worker(0, params)]
     workers[0].set_data(x, y)
     workers[0].cost_model.L = workers[0].cost_model.local_L
     update = ArtemisUpdate(params, workers)
     artificial_l = ones_tensor.clone().detach()
     update.H = artificial_l.clone().detach()
     new_w = update.compute(w, 2, 2)
     # Check that gradients have been updated.
     self.assertFalse(torch.equal(update.g, zero_tensor))
     self.assertFalse(torch.equal(update.v, zero_tensor))
     self.assertFalse(torch.equal(update.h, zero_tensor))
     # Check that l has been updated.
     self.assertFalse(torch.equal(update.H, artificial_l))
     # Check that correct value has been compressed
     self.assertTrue(torch.equal(update.value_to_compress, new_w - artificial_l))
Beispiel #9
0
 def test_doubleGRADIENTcompression_WITH_additional_memory(self):
     params = DoreVariant().define(n_dimensions=DIM, nb_devices=1, quantization_param=10)
     params.up_learning_rate = 0.5
     workers = [Worker(0, params)]
     workers[0].set_data(x, y)
     workers[0].cost_model.L = workers[0].cost_model.local_L
     update = ArtemisUpdate(params, workers)
     artificial_l = ones_tensor.clone().detach()
     # We artificially set different memory to check that it has impact on update computation.
     update.H = artificial_l.clone().detach()
     update.compute(w, 2, 2)
     # Check that gradients have been updated.
     self.assertFalse(torch.equal(update.g, zero_tensor))
     self.assertFalse(torch.equal(update.v, zero_tensor))
     self.assertFalse(torch.equal(update.h, zero_tensor))
     # Check that l has been updated.
     self.assertFalse(torch.equal(update.H, artificial_l))
     # Check that correct value has been compressed
     self.assertTrue(torch.equal(update.value_to_compress, update.g - artificial_l))
    def __init__(self, parameters: Parameters) -> None:
        """Initialization of the gradient descent.

        It initialize all the worker of the network, the sequence of (averaged) losses,
        the sequence of (averaged) models.

        Args:
            parameters: the parameters of the descent.
        """
        super().__init__()
        self.parameters = parameters
        self.losses = []
        self.model_params = []
        self.averaged_model_params = []
        self.averaged_losses = []
        self.X, self.Y = None, None

        if self.parameters.quantization_param != 0:
            self.parameters.omega_c = s_quantization_omega_c(
                self.parameters.n_dimensions,
                self.parameters.quantization_param)

            # If learning_rate is None, we set it to optimal value.
            if self.parameters.learning_rate == None:
                self.parameters.learning_rate = 1 / (
                    2 * (self.parameters.omega_c + 1))
            else:
                if not self.parameters.force_learning_rate:
                    self.parameters.learning_rate *= 1 / (
                        1 * (self.parameters.omega_c + 1))

        # If quantization_param == 0, it means there is no compression,
        # which means that we don't want to "predict" values with previous one,
        # and thus, we put learning_rate to zero.
        else:
            self.parameters.learning_rate = 0

        # Creating each worker of the network.
        self.workers = [
            Worker(i, parameters, self.__local_update__())
            for i in range(self.parameters.nb_devices)
        ]