Exemple #1
0
    def _create_accumulators(self, block, parameters):
        assert isinstance(block, framework.Block)

        main_block = block.program.global_block()
        # Create beta1 and beta2 power tensors
        beta_shape = [1]
        self._beta1_pow_acc = self.helper.create_global_variable(
            name=unique_name.generate('beta1_pow_acc'),
            dtype='float32',
            shape=beta_shape,
            lod_level=0,
            persistable=True)
        self.helper.set_variable_initializer(self._beta1_pow_acc,
                                             initializer=Constant(self._beta1))

        self._beta2_pow_acc = self.helper.create_global_variable(
            name=unique_name.generate('beta2_pow_acc'),
            dtype='float32',
            shape=beta_shape,
            lod_level=0,
            persistable=True)

        self.helper.set_variable_initializer(self._beta2_pow_acc,
                                             initializer=Constant(self._beta2))

        # Create accumulator tensors for first and second moments
        for p in parameters:
            self._add_accumulator(self._moment1_acc_str, p)
            self._add_accumulator(self._moment2_acc_str, p)
Exemple #2
0
 def __norm_op(x,
               out=None,
               p=2,
               dim=None,
               keep_dim=False,
               block=self.startup_program.global_block()):
     if out is None:
         out = block.create_var(
             name=unique_name.generate(".".join(
                 [self.name, 'weight_norm_norm'])),
             dtype=dtype,
             persistable=False)
     abs_out = block.create_var(
         name=unique_name.generate(".".join(
             [self.name, 'weight_norm_abs'])),
         dtype=dtype,
         persistable=False)
     block.append_op(
         type='abs', inputs={'X': x}, outputs={'Out': abs_out})
     pow_out = block.create_var(
         name=unique_name.generate(".".join(
             [self.name, 'weight_norm_pow'])),
         dtype=dtype,
         persistable=False)
     block.append_op(
         type='pow',
         inputs={'X': abs_out},
         outputs={'Out': pow_out},
         attrs={'factor': float(p)})
     sum_out = block.create_var(
         name=unique_name.generate(".".join(
             [self.name, 'weight_norm_sum'])),
         dtype=dtype,
         persistable=False)
     block.append_op(
         type='reduce_sum',
         inputs={'X': pow_out},
         outputs={'Out': sum_out},
         attrs={
             'dim': dim,
             'keep_dim': keep_dim,
             'reduce_all': True if dim is None else False
         })
     block.append_op(
         type='pow',
         inputs={'X': sum_out},
         outputs={'Out': out},
         attrs={'factor': 1. / p})
     return out
Exemple #3
0
    def create_parameter(self,
                         attr,
                         shape,
                         dtype,
                         is_bias=False,
                         default_initializer=None):
        # Deepcopy the attr so that parameters can be shared in program
        attr = copy.deepcopy(attr)
        assert isinstance(attr, ParamAttr)
        suffix = 'b' if is_bias else 'w'
        if attr.name is None:
            attr.name = unique_name.generate(".".join([self.name, suffix]))

        if default_initializer is None and attr.initializer is None:
            if is_bias:
                attr.set_default_bias_initializer()
            else:
                attr.set_default_param_initializer()
        else:
            attr.set_default_initializer(default_initializer)

        # If weight normalization is set, insert extra parameters and ops.
        # Refer to https://arxiv.org/pdf/1602.07868.pdf
        if isinstance(attr, WeightNormParamAttr):
            param = self._create_weight_normalize(attr, shape, dtype)
            WeightNormParamAttr.params_with_weight_norm.append(param)
            return param

        self.startup_program.global_block().create_parameter(
            dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True))
        return self.main_program.global_block().create_parameter(
            dtype=dtype, shape=shape, **attr.to_kwargs())
Exemple #4
0
 def __norm_except_dim(x,
                       out=None,
                       dim=None,
                       block=self.startup_program.global_block()):
     """Computes the norm over all dimensions except dim"""
     if out is None:
         out = block.create_var(
             name=unique_name.generate(".".join(
                 [self.name, 'weight_norm_norm'])),
             dtype=dtype,
             persistable=False)
     if dim is None:
         __norm_op(x, out, dim=dim, block=block)
     elif dim == 0:
         out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1)
         reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block)
         norm = __norm_op(reshape, dim=1, block=block)
         __reshape_op(norm, out=out, shape=out_shape, block=block)
     elif dim == len(x.shape) - 1:
         out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]]
         reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block)
         norm = __norm_op(reshape, dim=0, block=block)
         __reshape_op(norm, out=out, shape=out_shape, block=block)
     else:
         perm = range(len(x.shape))
         perm[0], perm[dim] = dim, 0
         transpose = __transpose_op(x, perm, block=block)
         norm = __norm_op(transpose, dim=0, block=block)
         __transpose_op(norm, perm, out=out, block=block)
     return out
Exemple #5
0
    def _add_accumulator(self,
                         name,
                         param,
                         dtype=None,
                         fill_value=0.0,
                         shape=None):
        """Utility function to add an accumulator for a parameter

        Args:
            block: the block in which the loss variable is present
            name: name of the accumulator
            param: parameter variable for which accumulator is to be added
            dtype: data type of the accumulator variable
            fill_value: value to initialize the accumulator variable
        """
        if (name in self._accumulators
                and param.name in self._accumulators[name]):
            raise Exception(
                "Accumulator {} already exists for parameter {}".format(
                    name, param.name))
        if shape == None:
            shape = param.shape
        assert isinstance(self.helper, LayerHelper)
        var = self.helper.create_global_variable(
            name=unique_name.generate(name),
            persistable=True,
            dtype=dtype or param.dtype,
            type=param.type,
            shape=shape)
        self.helper.set_variable_initializer(
            var, initializer=Constant(value=float(fill_value)))
        self._accumulators[name][param.name] = var
        return var
Exemple #6
0
 def __transpose_op(x,
                    axis,
                    out=None,
                    block=self.startup_program.global_block()):
     if out is None:
         out = block.create_var(
             name=unique_name.generate(".".join(
                 [self.name, 'weight_norm_transpose'])),
             dtype=dtype,
             persistable=False)
     block.append_op(
         type='transpose',
         inputs={'X': x},
         outputs={'Out': out},
         attrs={'axis': axis})
     return out
Exemple #7
0
    def _create_accumulators(self, block, parameters):
        # Create beta1 power accumulator tensor
        beta_shape = [1]
        self._beta1_pow_acc = self.helper.create_global_variable(
            name=unique_name.generate('beta1_pow_acc'),
            dtype='float32',
            shape=beta_shape,
            lod_level=0,
            persistable=True)
        self.helper.set_variable_initializer(self._beta1_pow_acc,
                                             initializer=Constant(self._beta1))

        # Create accumulator tensors for first moment and infinity norm
        for p in parameters:
            self._add_accumulator(self._moment_acc_str, p)
            self._add_accumulator(self._inf_norm_acc_str, p)
Exemple #8
0
def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map):
    var_map = copy.copy(target_grad_map)
    for op_idx in range(start_op_idx, block.desc.op_size()):
        op_desc = block.desc.op(op_idx)
        for name in op_desc.input_arg_names():
            if name in var_map:
                op_desc.rename_input(name, var_map[name])

        for name in op_desc.output_arg_names():
            if block.desc.find_var(name.encode("ascii")):
                new_name = unique_name.generate(name)
                op_desc.rename_output(name, new_name)
                var_map[name] = new_name

    for g, ng in var_map.iteritems():
        if g in grad_to_var:
            grad_to_var[ng] = grad_to_var[g]
            grad_to_var.pop(g)
Exemple #9
0
def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map):
    var_map = copy.copy(target_grad_map)
    for op_idx in range(start_op_idx, block.desc.op_size()):
        op_desc = block.desc.op(op_idx)
        for name in op_desc.input_arg_names():
            if name in var_map:
                op_desc.rename_input(name, var_map[name])

        for name in op_desc.output_arg_names():
            if block.desc.find_var(name.encode("ascii")):
                new_name = unique_name.generate(name)
                op_desc.rename_output(name, new_name)
                var_map[name] = new_name

    for g, ng in var_map.iteritems():
        if g in grad_to_var:
            grad_to_var[ng] = grad_to_var[g]
            grad_to_var.pop(g)
Exemple #10
0
    def _create_state(self, suffix, dtype, shape):
        """
        Create state variable.

        Args:
            suffix(str): the state suffix.
            dtype(str|core.VarDesc.VarType): the state data type
            shape(tuple|list): the shape of state

        Returns: State variable

        """
        state = self.helper.create_variable(name="_".join(
            [unique_name.generate(self.helper.name), suffix]),
                                            persistable=True,
                                            dtype=dtype,
                                            shape=shape)
        self.states.append(state)
        return state
Exemple #11
0
    def _create_global_learning_rate(self):
        lr = self.global_learning_rate()

        if isinstance(lr, framework.Variable):
            return
        else:
            if not isinstance(self._learning_rate, float):
                raise TypeError(
                    "learning rate variable is create outside optimizer,"
                    "can not create new learning rate variable for new program"
                )

        # create learning rate in the current main program
        self._learning_rate_map[
            framework.default_main_program()] = layers.create_global_var(
                name=unique_name.generate("learning_rate"),
                shape=[1],
                value=float(self._learning_rate),
                dtype='float32',
                persistable=True)
Exemple #12
0
    def __init__(self,
                 average_window_rate,
                 params_grads=None,
                 min_average_window=10000,
                 max_average_window=10000,
                 **kwargs):
        super(ModelAverage, self).__init__(0.0, **kwargs)
        self.average_window = average_window_rate
        self.min_average_window = min_average_window
        self.max_average_window = max_average_window

        self.params_grads = [] if params_grads is None else params_grads
        params = {}
        for param, grad in self.params_grads:
            if param.do_model_average != False:
                params[param.name] = (param, grad)
        for param in framework.default_main_program().global_block(
        ).all_parameters():
            if param.name not in params and param.do_model_average != False:
                grad = param.block.create_var(name=unique_name.generate(
                    ".".join([param.name, 'tmp'])),
                                              dtype=param.dtype,
                                              persistable=False,
                                              stop_gradient=True)
                params[param.name] = (param, grad)
        self.params_grads = params.values()

        for param, grad in self.params_grads:
            self._append_average_accumulate_op(param)

        self.apply_program = Program()
        block = self.apply_program.global_block()
        with program_guard(main_program=self.apply_program):
            for param_grad in self.params_grads:
                self._add_average_apply_op(block, param_grad)

        self.restore_program = Program()
        block = self.restore_program.global_block()
        with program_guard(main_program=self.restore_program):
            for param_grad in self.params_grads:
                self._add_average_restore_op(block, param_grad)
Exemple #13
0
    def create_state(self, suffix, dtype, shape):
        """
        Create state variable.

        NOTE: It is not a public API.

        Args:
            suffix(str): the state suffix.
            dtype(str|core.VarDesc.VarType): the state data type
            shape(tuple|list): the shape of state

        Returns: State variable

        """
        state = self.helper.create_variable(
            name="_".join([unique_name.generate(self.helper.name), suffix]),
            persistable=True,
            dtype=dtype,
            shape=shape)
        self.states.append(state)
        return state
Exemple #14
0
    def __init__(self,
                 block,
                 type=core.VarDesc.VarType.LOD_TENSOR,
                 name=None,
                 shape=None,
                 dtype=None,
                 lod_level=None,
                 capacity=None,
                 persistable=None,
                 error_clip=None,
                 stop_gradient=False,
                 **kwargs):
        self.block = block
        self.error_clip = error_clip

        if name is None:
            name = unique_name.generate('_generated_var')
        is_new_var = False
        self.desc = self.block.desc.find_var(name)

        if self.desc is None:
            self.desc = self.block.desc.var(name)
            is_new_var = True

        if is_new_var:
            self.desc.set_type(type)
        elif self.desc.type() != type:
            raise ValueError("Variable {0} has been created before. The "
                             "previous type is {1}; the new type is {2}. They"
                             " are not matched".format(self.name,
                                                       self.desc.type(), type))

        if shape is not None:
            if is_new_var:
                self.desc.set_shape(shape)
            else:
                old_shape = self.shape
                shape = tuple(shape)
                if shape != old_shape:
                    raise ValueError(
                        "Variable {0} has been created before. the previous "
                        "shape is {1}; the new shape is {2}. They are not "
                        "matched.".format(self.name, old_shape, shape))
        if dtype is not None:
            if not isinstance(dtype, core.VarDesc.VarType):
                dtype = convert_np_dtype_to_dtype_(dtype)
            if is_new_var:
                self.desc.set_dtype(dtype)
            else:
                old_dtype = self.dtype
                if dtype != old_dtype:
                    raise ValueError("Variable {0} has been created before. "
                                     "The previous data type is {1}; the new "
                                     "data type is {2}. They are not "
                                     "matched.".format(self.name, old_dtype,
                                                       dtype))

        if lod_level is not None:
            if is_new_var:
                self.desc.set_lod_level(lod_level)
            else:
                if lod_level != self.lod_level:
                    raise ValueError("Variable {0} has been created before. "
                                     "The previous lod_level is {1}; the new "
                                     "lod_level is {2}. They are not "
                                     "matched".format(self.name,
                                                      self.lod_level,
                                                      lod_level))
        if persistable is not None:
            if is_new_var:
                self.desc.set_persistable(persistable)
            else:
                if persistable != self.persistable:
                    raise ValueError(
                        "Variable {0} has been created before."
                        "The previous persistable is {1}; the new "
                        "persistable is {2}. They are not matched".format(
                            self.name, self.persistable, persistable))

        if capacity is not None:
            if is_new_var:
                self.desc.set_capacity(capacity)
            else:
                # TODO(abhinavarora) : Compare with set capacity once,
                # get_capacity is implemented
                pass

        self.block.vars[name] = self
        self.op = None
        self.stop_gradient = stop_gradient
Exemple #15
0
 def create_tmp_variable(self, dtype, stop_gradient=False):
     return self.main_program.current_block().create_var(
         name=unique_name.generate(".".join([self.name, 'tmp'])),
         dtype=dtype,
         persistable=False,
         stop_gradient=stop_gradient)
Exemple #16
0
 def __init__(self, layer_type, **kwargs):
     self.kwargs = kwargs
     self.layer_type = layer_type
     name = self.kwargs.get('name', None)
     if name is None:
         self.kwargs['name'] = unique_name.generate(self.layer_type)