def apply_optimizers(
        self,
        train_net,
        train_init_net,
        grad_map,
        blob_to_device=None,
    ):
        CPU = muji.OnCPU()
        # if given, blob_to_device is a map from blob to device_option
        blob_to_device = blob_to_device or {}
        for param, optimizer in viewitems(self.param_to_optim):
            assert optimizer is not None, \
                "default optimizer must have been set in add_layer"
            # note that not all params has gradient and thus we sent None if
            # gradient does not exists
            device = get_param_device(
                param,
                grad_map.get(str(param)),
                param_to_device=blob_to_device,
                default_device=CPU,
            )
            if device is not None:
                # extra info is not applicable for optimizers
                del device.extra_info[:]

            with core.DeviceScope(device):
                optimizer(
                    train_net, train_init_net, param, grad_map.get(str(param)))
 def apply_regularizers_after_optimizer(
     self,
     train_net,
     train_init_net,
     grad_map,
     blob_to_device=None,
 ):
     logger.info("apply regularizer after optimizer")
     CPU = muji.OnCPU()
     # if given, blob_to_device is a map from blob to device_option
     blob_to_device = blob_to_device or {}
     for param, regularizer in viewitems(self.param_to_reg):
         if regularizer is None:
             continue
         assert isinstance(regularizer, Regularizer)
         logger.info("add regularizer {0} for param {1} to optimizer".format(regularizer, param))
         device = get_param_device(
             param,
             grad_map.get(str(param)),
             param_to_device=blob_to_device,
             default_device=CPU,
         )
         with core.DeviceScope(device):
             regularizer(
                 train_net, train_init_net, param, grad=grad_map.get(str(param)),
                 by=RegularizationBy.AFTER_OPTIMIZER
             )
Beispiel #3
0
 def apply_regularizers_after_optimizer(
     self,
     train_net,
     train_init_net,
     grad_map,
     blob_to_device=None,
 ):
     CPU = muji.OnCPU()
     # if given, blob_to_device is a map from blob to device_option
     blob_to_device = blob_to_device or {}
     for param, regularizer in viewitems(self.param_to_reg):
         if regularizer is None or not regularizer.apply_after_optimizer:
             continue
         assert isinstance(regularizer, Regularizer)
         device = get_param_device(
             param,
             grad_map.get(str(param)),
             param_to_device=blob_to_device,
             default_device=CPU,
         )
         with core.DeviceScope(device):
             regularizer(
                 train_net, train_init_net, param, grad_map.get(str(param)))
Beispiel #4
0
 def apply_optimizers(
     self,
     train_net,
     train_init_net,
     grad_map,
     blob_to_device=None,
 ):
     CPU = muji.OnCPU()
     # if given, blob_to_device is a map from blob to device_option
     blob_to_device = blob_to_device or {}
     for param, optimizer in viewitems(self.param_to_optim):
         assert optimizer is not None, \
             "default optimizer must have been set in add_layer"
         # note that not all params has gradient and thus we sent None if
         # gradient does not exists
         device = get_param_device(
             param,
             grad_map.get(str(param)),
             param_to_device=blob_to_device,
             default_device=CPU,
         )
         with core.DeviceScope(device):
             optimizer(
                 train_net, train_init_net, param, grad_map.get(str(param)))
Beispiel #5
0
 def apply_optimizers(
     self,
     train_net,
     train_init_net,
     grad_map,
     blob_to_device=None,
 ):
     CPU = core.DeviceOption(caffe2_pb2.CPU)
     # if given, blob_to_device is a map from blob to device_option
     blob_to_device = blob_to_device or {}
     for param, optimizer in self.param_to_optim.items():
         assert optimizer is not None, \
             "default optimizer must have been set in add_layer"
         # note that not all params has gradient and thus we sent None if
         # gradient does not exists
         device = get_param_device(
             param,
             grad_map.get(str(param)),
             param_to_device=blob_to_device,
             default_device=CPU,
         )
         with core.DeviceScope(device):
             optimizer(train_net, train_init_net, param,
                       grad_map.get(str(param)))
Beispiel #6
0
 def infer_blob_device(blob_name):
     return optimizer.get_param_device(blob_name,
                                       "{}_grad".format(blob_name),
                                       param_to_device)
Beispiel #7
0
    def modify_net(self,
                   net,
                   init_net=None,
                   grad_map=None,
                   blob_to_device=None,
                   modify_output_record=False):

        assert grad_map is not None

        CPU = core.DeviceOption(caffe2_pb2.CPU)

        final_param_map = {}
        if self.blobs_to_include is None:
            final_param_map = grad_map
        else:
            for blob in self.blobs_to_include:
                param = core.BlobReference(blob)
                if not net.BlobIsDefined(param):
                    raise Exception(
                        'param {0} is not defined in net {1}'.format(
                            param, net.Name()))
                final_param_map[param] = grad_map[param]

        if self.blobs_to_exclude is not None:
            for blob in self.blobs_to_exclude:
                final_param_map.pop(blob, None)

        for param, grad in final_param_map.items():
            # currently sparse gradients won't be clipped
            # futher implementation is needed to enable it
            if isinstance(grad, core.GradientSlice):
                continue

            device = get_param_device(
                param,
                grad_map[str(param)],
                param_to_device=blob_to_device,
                default_device=CPU,
            )

            with core.DeviceScope(device):
                if self.grad_clip_method == self.BY_NORM:
                    if self.clip_norm_type == self.L2_NORM:
                        p = 2
                    elif self.clip_norm_type == self.L1_NORM:
                        p = 1

                    grad_norm = net.LpNorm(
                        [grad],
                        net.NextScopedBlob(prefix=str(grad) +
                                           '_l{}_norm'.format(p)),
                        p=p,
                    )

                    if p == 2:
                        grad_norm = net.Pow([grad_norm], exponent=0.5)

                    op_inputs = [grad, grad_norm]

                    if self.use_parameter_norm:
                        param_norm = net.LpNorm(
                            [param],
                            net.NextScopedBlob(prefix=str(param) +
                                               '_l{}_norm'.format(p)),
                            p=p,
                        )

                        if p == 2:
                            param_norm = net.Pow([param_norm], exponent=0.5)

                        op_inputs.append(param_norm)

                        if self.compute_norm_ratio:
                            net.Div([grad_norm, param_norm], [
                                net.NextScopedBlob(prefix=str(param) +
                                                   "_norm_ratio")
                            ])

                    net.ClipTensorByScaling(
                        op_inputs,
                        [grad],
                        threshold=self.clip_threshold,
                    )
                elif self.grad_clip_method == self.BY_VALUE:
                    net.Clip(
                        [grad],
                        [grad],
                        max=self.clip_max,
                        min=self.clip_min,
                    )
    def modify_net(self,
                   net,
                   init_net=None,
                   grad_map=None,
                   blob_to_device=None,
                   modify_output_record=False):

        assert grad_map is not None

        CPU = core.DeviceOption(caffe2_pb2.CPU)

        for param, grad in grad_map.items():

            # currently sparse gradients won't be clipped
            # futher implementation is needed to enable it
            if isinstance(grad, core.GradientSlice):
                continue

            device = get_param_device(
                param,
                grad_map[str(param)],
                param_to_device=blob_to_device,
                default_device=CPU,
            )

            with core.DeviceScope(device):
                if self.grad_clip_method == self.BY_NORM:
                    if self.clip_norm_type == self.L2_NORM:
                        p = 2
                    elif self.clip_norm_type == self.L1_NORM:
                        p = 1

                    grad_norm = net.LpNorm(
                        [grad],
                        net.NextScopedBlob(prefix=str(grad) +
                                           '_l{}_norm'.format(p)),
                        p=p,
                    )

                    if p == 2:
                        grad_norm = net.Pow([grad_norm], exponent=0.5)

                    op_inputs = [grad, grad_norm]

                    if self.use_parameter_norm:
                        param_norm = net.LpNorm(
                            [param],
                            net.NextScopedBlob(prefix=str(param) +
                                               '_l{}_norm'.format(p)),
                            p=p,
                        )

                        if p == 2:
                            param_norm = net.Pow([param_norm], exponent=0.5)

                        op_inputs.append(param_norm)

                        if self.compute_norm_ratio:
                            net.Div([grad_norm, param_norm], [
                                net.NextScopedBlob(prefix=str(param) +
                                                   '_norm_ratio')
                            ])

                    net.ClipTensorByScaling(
                        op_inputs,
                        [grad],
                        threshold=self.clip_threshold,
                    )
Beispiel #9
0
 def infer_blob_device(blob_name):
     return optimizer.get_param_device(
         blob_name, "{}_grad".format(blob_name), param_to_device
     )
Beispiel #10
0
    def modify_net(self, net, init_net=None, grad_map=None, blob_to_device=None):

        assert grad_map is not None

        CPU = core.DeviceOption(caffe2_pb2.CPU)

        for param, grad in grad_map.items():

            # currently sparse gradients won't be clipped
            # futher implementation is needed to enable it
            if isinstance(grad, core.GradientSlice):
                continue

            device = get_param_device(
                param,
                grad_map[str(param)],
                param_to_device=blob_to_device,
                default_device=CPU,
            )

            with core.DeviceScope(device):
                if self.grad_clip_method == self.BY_NORM:
                    if self.clip_norm_type == self.L2_NORM:
                        p = 2
                    elif self.clip_norm_type == self.L1_NORM:
                        p = 1

                    grad_norm = net.LpNorm(
                        [grad],
                        net.NextScopedBlob(prefix=str(grad) + '_l{}_norm'.format(p)),
                        p=p,
                    )

                    if p == 2:
                        grad_norm = net.Pow([grad_norm], exponent=0.5)

                    op_inputs = [grad, grad_norm]

                    if self.use_parameter_norm:
                        param_norm = net.LpNorm(
                            [param],
                            net.NextScopedBlob(
                                prefix=str(param) + '_l{}_norm'.format(p)),
                            p=p,
                        )

                        if p == 2:
                            param_norm = net.Pow([param_norm], exponent=0.5)

                        op_inputs.append(param_norm)

                        if self.compute_norm_ratio:
                            net.Div(
                                [grad_norm, param_norm],
                                [net.NextScopedBlob(
                                    prefix=str(param) + '_norm_ratio')]
                            )

                    net.ClipTensorByScaling(
                        op_inputs,
                        [grad],
                        threshold=self.clip_threshold,
                    )
def _build_l1_bn(
    model,
    optimizer,
    weights_only=False,
    use_param_info_optim=True,
    max_gradient_norm=None,
    allow_lr_injection=False,
):
    param_to_device = _get_param_to_device(model)

    # Validate there are no duplicate params
    model.Validate()

    params = []
    for param_info in model.GetOptimizationParamInfo():
        if weights_only and param_info.blob not in model.weights:
            continue
        # add L1 norm for spatial bn
        if param_info.name.endswith('bn_s'):
            params.append(param_info)

    lr_multiplier = None
    if max_gradient_norm is not None:
        lr_multiplier = _calc_norm_ratio(
            model,
            params,
            'norm_clipped_grad_update',
            param_to_device,
            max_gradient_norm,
        )

    if allow_lr_injection:
        if not model.net.BlobIsDefined(_LEARNING_RATE_INJECTION):
            lr_injection = model.param_init_net.ConstantFill(
                [],
                _LEARNING_RATE_INJECTION,
                shape=[1],
                value=1.0,
            )
        else:
            lr_injection = _LEARNING_RATE_INJECTION

        if lr_multiplier is None:
            lr_multiplier = lr_injection
        else:
            lr_multiplier = model.net.Mul(
                [lr_multiplier, lr_injection],
                'lr_multiplier',
                broadcast=1,
            )
    optimizer.add_lr_multiplier(lr_multiplier)

    for param_info in params:
        param_name = str(param_info.blob)

        device = get_param_device(param_name, param_info.grad, param_to_device)

        with core.DeviceScope(device):
            if param_info.optimizer and use_param_info_optim:
                param_info.optimizer(model.net, model.param_init_net,
                                     param_info)
            else:
                optimizer(model.net, model.param_init_net, param_info)
    return optimizer