Ejemplo n.º 1
0
 def test_load(self):
     mul_out, b1_out, b2_out, mean_out = self.net()
     sgd_optimizer = optimizer.SGD(learning_rate=1.0)
     recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
     recompute_optimizer._set_checkpoints([b1_out])
     try:
         stat_dict = {}
         recompute_optimizer.load(stat_dict)
     except NotImplementedError as e:
         self.assertEqual(
             "load function is not supported by Recompute Optimizer for now",
             cpt.get_exception_message(e))
Ejemplo n.º 2
0
    def test_adjacent_checkpoint(self):
        mul_out, b1_out, b2_out, mean_out = self.net()
        self.assertEqual(len(mean_out.block.ops), 4)
        self.assertEqual([op.type for op in mean_out.block.ops],
                         ["mul", "elementwise_add", "elementwise_add", "mean"])
        sgd_optimizer = optimizer.SGD(learning_rate=1.0)
        recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
        recompute_optimizer._set_checkpoints([mul_out, b1_out])
        opts, params_grads = recompute_optimizer.minimize(mean_out)

        self.assertEqual(len(mean_out.block.ops), 12)
        self.assertEqual([op.type for op in mean_out.block.ops], [
            "mul", "elementwise_add", "elementwise_add", "mean",
            "fill_constant", "mean_grad", "elementwise_add_grad",
            "elementwise_add_grad", "mul_grad", "sgd", "sgd", "sgd"
        ])
Ejemplo n.º 3
0
    def test_dropout(self):
        """
        If there are dropout layers in the forward nets, we should add a
        seed op
        """
        mul_out, b1_out, b2_out, mean_out = self.net(with_dropout=True)
        self.assertEqual(len(mean_out.block.ops), 5)
        self.assertEqual(
            [op.type for op in mean_out.block.ops],
            ["mul", "dropout", "elementwise_add", "elementwise_add", "mean"])
        sgd_optimizer = optimizer.SGD(learning_rate=1.0)
        recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
        recompute_optimizer._set_checkpoints([b1_out])
        opts, params_grads = recompute_optimizer.minimize(mean_out)

        self.assertEqual(len(mean_out.block.ops), 17)
        self.assertEqual([op.type for op in mean_out.block.ops], [
            "mul", "seed", "dropout", "elementwise_add", "elementwise_add",
            "mean", "fill_constant", "mean_grad", "elementwise_add_grad",
            "mul", "dropout", "elementwise_add_grad", "dropout_grad",
            "mul_grad", "sgd", "sgd", "sgd"
        ])
Ejemplo n.º 4
0
    def test_apply_gradients(self):
        mul_out, b1_out, b2_out, mean_out = self.net()
        sgd_optimizer = optimizer.SGD(learning_rate=1.0)
        recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
        recompute_optimizer._set_checkpoints([b1_out])
        # apply backward
        params_grads = recompute_optimizer.backward(mean_out,
                                                    startup_program=None,
                                                    parameter_list=None,
                                                    no_grad_set=None)

        # apply gradient
        program = mean_out.block.program
        with framework.program_guard(program, None):
            optimize_ops = recompute_optimizer.apply_gradients(params_grads)

        self.assertEqual(len(mean_out.block.ops), 13)
        self.assertEqual([op.type for op in mean_out.block.ops], [
            "mul", "elementwise_add", "elementwise_add", "mean",
            "fill_constant", "mean_grad", "elementwise_add_grad", "mul",
            "elementwise_add_grad", "mul_grad", "sgd", "sgd", "sgd"
        ])
Ejemplo n.º 5
0
    def check_dgc_momentum_optimizer(self,
                                     dims=[5, 10, 8],
                                     name="momentum",
                                     regularization=None,
                                     use_recompute=False):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32",
            shape=[dims[0], dims[1]],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1},
            regularizer=None if regularization is not None else
            regularizer.L2DecayRegularizer(2e-4))
        mul_y = block.create_var(dtype="float32",
                                 shape=[dims[1], dims[2]],
                                 lod_level=0,
                                 name="mul.y")
        mul_out = block.create_var(dtype="float32",
                                   shape=[dims[0], dims[2]],
                                   lod_level=0,
                                   name="mul.out")
        block.append_op(type="mul",
                        inputs={
                            "X": mul_x,
                            "Y": mul_y
                        },
                        outputs={"Out": mul_out},
                        attrs={"x_num_col_dims": 1})
        learning_rate = 0.01

        dgc_momentum_optimizer = self.MockDGCMomentum(
            learning_rate=learning_rate,
            momentum=0.2,
            rampup_begin_step=0,
            num_trainers=2,
            regularization=regularization,
            grad_clip=clip.GradientClipByNorm(1.0))

        if use_recompute:
            dgc_momentum_optimizer = optimizer.RecomputeOptimizer(
                dgc_momentum_optimizer)
            dgc_momentum_optimizer._set_checkpoints([])
            dgc_momentum_optimizer.get_accumulators = dgc_momentum_optimizer._optimizer.get_accumulators
            dgc_momentum_optimizer.get_velocity_str = dgc_momentum_optimizer._optimizer.get_velocity_str

        mean_out = block.create_var(dtype="float32",
                                    shape=[1],
                                    lod_level=0,
                                    name="mean.out")
        block.append_op(type="mean",
                        inputs={"X": mul_out},
                        outputs={"Out": mean_out})
        # params_grads = append_backward(mean_out)
        params_grads = dgc_momentum_optimizer.backward(mean_out)
        accumulator_count = 1 if name == "momentum" else 2
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(dgc_momentum_optimizer.get_accumulators()),
                         accumulator_count)
        with framework.program_guard(program, init_program):
            opts = dgc_momentum_optimizer.apply_gradients(params_grads)
        self.assertEqual(len(opts), 2)
        sgd_op = opts[-1]
        self.assertEqual([op.type for op in opts], ["scale", name])
        self.assertFalse(sgd_op.attr('use_nesterov'))

        # Check accumulators
        accumulators = dgc_momentum_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), accumulator_count)
        self.assertTrue(
            dgc_momentum_optimizer.get_velocity_str() in accumulators)
        velocity_acc = accumulators[dgc_momentum_optimizer.get_velocity_str()]
        self.assertEqual(len(velocity_acc), 1)
        self.assertTrue(mul_x.name in velocity_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 1)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)

        # check dgc op regularization coeff
        train_ops = program.global_block().ops
        for op in train_ops:
            if op.type == "dgc":
                coeff = 2e-4 if regularization is None else 1e-4
                self.assertAlmostEqual(op.attr('regular_coeff'), coeff)
                print("dgc regular_coeff=" + str(coeff))