Example #1
0
 def __init__(self, optimizer, epsilon=1e-05, coefficient=0.001, use_clip=False,
              lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name):
     super(LARS, self).__init__(0.0, [Parameter(Tensor(0.0), name="fake_param")])
     _check_param_value(optimizer, epsilon, coefficient, use_clip, self.cls_name)
     self.opt = optimizer
     self.lars = P.LARSUpdate(epsilon, coefficient, use_clip)
     self.cast = P.Cast()
     self.parameters = optimizer.parameters
     if use_clip is True:
         self.learning_rate = optimizer.learning_rate
         self.dynamic_lr = optimizer.dynamic_lr
         self.gather = optimizer.gather
         self.assignadd = optimizer.assignadd
         self.global_step = optimizer.global_step
     else:
         self.learning_rate = Parameter(Tensor(0.0, dtype=mstype.float32), name="fake_lr")
     self.reciprocal_scale = optimizer.reciprocal_scale
     optimizer.reciprocal_scale = 1.0
     self.is_group = optimizer.is_group
     if self.is_group:
         self.weight_decay = tuple(map(lambda x: x / optimizer.loss_scale, optimizer.weight_decay))
     else:
         self.weight_decay = optimizer.weight_decay / optimizer.loss_scale
     optimizer.exec_weight_decay = False
     optimizer.weight_decay = 0.0
     self.decay_flags = optimizer.decay_flags
     self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
Example #2
0
    def __init__(self, optimizer, epsilon=1e-05, coefficient=0.001, use_clip=False,
                 lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name):
        super(LARS, self).__init__(0.0, [Parameter(Tensor(0.0), name="fake_param")])
        _check_param_value(optimizer, epsilon, coefficient, use_clip, self.cls_name)
        self.opt = optimizer
        self.parameters = optimizer.parameters
        self.use_clip = use_clip
        self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
        self.is_group = optimizer.is_group
        self.learning_rate = Parameter(Tensor(0.0, dtype=mstype.float32), name="fake_lr")
        self.decay_flags = optimizer.decay_flags
        self.reciprocal_scale = optimizer.reciprocal_scale
        self.hyper_map = C.HyperMap()
        self.lars = P.LARSUpdate(epsilon, coefficient, use_clip)
        self.cast = P.Cast()

        if use_clip:
            self.is_group_lr = optimizer.is_group_lr
            self.dynamic_lr = optimizer.dynamic_lr
            self.origin_learning_rate = optimizer.learning_rate
            self.global_step = optimizer.global_step
            if self.is_group_lr and self.dynamic_lr:
                raise ValueError('Grouped dynamic learning rate is currently not supported for the inputs optimizer ' \
                                 'of lars.')

        if self.is_group:
            self.weight_decay = tuple(map(lambda x: x / optimizer.loss_scale, optimizer.weight_decay))
            optimizer.weight_decay = tuple(map(lambda x: 0.0, optimizer.weight_decay))
        else:
            self.weight_decay = optimizer.weight_decay / optimizer.loss_scale
            optimizer.weight_decay = 0.0

        optimizer.decay_flags = tuple(map(lambda x: False, self.decay_flags))
        optimizer.reciprocal_scale = 1.0
        optimizer.exec_weight_decay = False
Example #3
0
def test_bprop_first_only():
    grads = bprop(
        Net(),
        Tensor(np.ones([2, 3]).astype(np.float32)),
        Tensor(np.ones([3, 2]).astype(np.float32)),
        grads_wrt_outputs=(Tensor(np.ones([2, 3]).astype(np.float32)),
                           Tensor(np.ones([2, 2]).astype(np.float32))))
    print(grads)
Example #4
0
def test_bprop_wrt_inputs_and_params():
    net = Net()
    grads = bprop(net, Tensor(np.ones([2, 3]).astype(np.float32)), Tensor(np.ones([3, 2]).astype(np.float32)),
                  grads_wrt_outputs=(Tensor(np.ones([2, 3]).astype(np.float32)),
                                     Tensor(np.ones([2, 2]).astype(np.float32))),
                  wrt=['inputs', 'params'],
                  params=net.trainable_params())
    print(grads)
Example #5
0
def test_bprop_wrt_params_no_sens():
    net = Net()
    grads = bprop(net,
                  Tensor(np.ones([2, 3]).astype(np.float32)),
                  Tensor(np.ones([3, 2]).astype(np.float32)),
                  wrt=['params'],
                  params=net.trainable_params())
    print(grads)
Example #6
0
def test_bprop_sens():
    grads = bprop(
        Net(),
        Tensor(np.ones([2, 3]).astype(np.float32)),
        Tensor(np.ones([3, 2]).astype(np.float32)),
        grads_wrt_outputs=(Tensor(np.ones([2, 3]).astype(np.float32)),
                           Tensor(np.ones([2, 2]).astype(np.float32))),
        wrt=['inputs'])
    print(grads)
Example #7
0
def test_create_primitive_object_on_construct_use_kwargs():
    """ test_create_primitive_object_on_construct_use_kwargs """
    log.debug("begin test_create_primitive_object_on_construct_use_kwargs")
    context.set_context(mode=context.GRAPH_MODE)
    x = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
    y = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
    net = NetD()
    net(x, y)
    log.debug("finished test_create_primitive_object_on_construct_use_kwargs")
Example #8
0
def test_create_primitive_object_on_construct():
    """ test_create_primitive_object_on_construct """
    log.debug("begin test_create_object_on_construct")
    x = Tensor(np.array([[1, 2, 3], [1, 2, 3]], np.float32))
    y = Tensor(np.array([[2, 3, 4], [1, 1, 2]], np.float32))

    net = Net1()
    net.construct(x, y)
    log.debug("finished test_create_object_on_construct")
Example #9
0
 def __init__(self, params, accum=0.1, learning_rate=0.001, l1=0.0, l2=0.0,
              use_locking=False, loss_scale=1.0, weight_decay=0.0):
     super(ProximalAdagrad, self).__init__(learning_rate, params, weight_decay, loss_scale)
     _check_param_value(accum, l1, l2, use_locking, self.cls_name)
     self.accum = self.parameters.clone(prefix="accum", init=accum)
     self.l1 = Tensor(l1, mstype.float32)
     self.l2 = Tensor(l2, mstype.float32)
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyProximalAdagrad(use_locking=use_locking)
     self.sparse_opt = P.FusedSparseProximalAdagrad(use_locking=use_locking)
Example #10
0
 def __init__(self, params, accum=0.1, learning_rate=0.001, l1=0.0, l2=0.0,
              use_locking=False, loss_scale=1.0, weight_decay=0.0):
     super(ProximalAdagrad, self).__init__(learning_rate, params, weight_decay, loss_scale)
     if self.is_group:
         raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
     _check_param_value(accum, l1, l2, use_locking, self.cls_name)
     self.accum = self.parameters.clone(prefix="accum", init=accum)
     self.l1 = Tensor(l1, mstype.float32)
     self.l2 = Tensor(l2, mstype.float32)
     self.weight_decay = weight_decay
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyProximalAdagrad(use_locking=use_locking)
     self.sparse_opt = inner.SparseApplyProximalAdagradNoReturn(use_locking=use_locking)
Example #11
0
 def __init__(self,
              rgb_range,
              rgb_mean=(0.4488, 0.4371, 0.4040),
              rgb_std=(1.0, 1.0, 1.0),
              sign=-1):
     super(MeanShift, self).__init__(3, 3, kernel_size=1)
     self.reshape = P.Reshape()
     self.eye = P.Eye()
     std = Tensor(rgb_std, mstype.float32)
     self.weight.set_data(
         self.reshape(self.eye(3, 3, mstype.float32), (3, 3, 1, 1)) / self.reshape(std, (3, 1, 1, 1)))
     self.weight.requires_grad = False
     self.bias = Parameter(
         sign * rgb_range * Tensor(rgb_mean, mstype.float32) / std, name='bias', requires_grad=False)
     self.has_bias = True
Example #12
0
    def __init__(self,
                 params,
                 initial_accum=0.1,
                 learning_rate=0.001,
                 lr_power=-0.5,
                 l1=0.0,
                 l2=0.0,
                 use_locking=False,
                 loss_scale=1.0,
                 weight_decay=0.0):
        super(FTRL, self).__init__(learning_rate, params)

        _check_param(initial_accum, learning_rate, lr_power, l1, l2,
                     use_locking, loss_scale, weight_decay, self.cls_name)
        self.moments = self.parameters.clone(prefix="moments",
                                             init=initial_accum)
        self.linear = self.parameters.clone(prefix="linear", init='zeros')
        self.l1 = l1
        self.l2 = l2
        self.lr_power = lr_power
        self.reciprocal_scale = 1.0 / loss_scale
        self.weight_decay = weight_decay
        self.decay_tf = tuple((lambda: True)() for x in self.parameters)
        self.hyper_map = C.HyperMap()
        self.opt = P.ApplyFtrl(use_locking=use_locking)
        self.one = Tensor(1, mstype.int32)
Example #13
0
 def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0,
              decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
     super(Momentum, self).__init__(learning_rate, params)
     if isinstance(momentum, float) and momentum < 0.0:
         raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
     if isinstance(learning_rate, Iterable) or \
             (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1):
         self.dynamic_lr = True
         self.gather = P.GatherV2()
         self.assignadd = P.AssignAdd()
         self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step")
         self.axis = 0
     else:
         self.dynamic_lr = False
         self.gather = None
         self.assignadd = None
         self.global_step = None
         self.axis = None
     self.momentum = Parameter(momentum, name="momentum")
     self.params = self.parameters
     self.moments = self.params.clone(prefix="moments", init='zeros')
     self.decay_tf = tuple(decay_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyMomentum()
     self.weight_decay = weight_decay * loss_scale
     self.reciprocal_scale = 1.0 / loss_scale
     self.one = Tensor(1, mstype.int32)
Example #14
0
 def __init__(self,
              optimizer,
              epsilon=1e-05,
              hyperpara=0.001,
              weight_decay=0.0,
              use_clip=False,
              decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias'
              not in x.name,
              lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias'
              not in x.name,
              loss_scale=1.0):
     super(LARS, self).__init__(0.0,
                                [Parameter(Tensor(0.0), name="trivial")])
     self.opt = optimizer
     self.parameters = optimizer.parameters
     self.learning_rate = optimizer.learning_rate
     self.lars = P.LARSUpdate(epsilon, hyperpara, use_clip)
     self.reciprocal_scale = 1.0 / loss_scale
     self.weight_decay = weight_decay * loss_scale
     self.cast = P.Cast()
     self.decay_flag = tuple(decay_filter(x) for x in self.parameters)
     self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.dynamic_lr = False
     self.gather = None
     self.global_step = None
     self.axis = None
     if isinstance(self.learning_rate.default_input, Iterable) or \
             (isinstance(self.learning_rate.default_input, Tensor) and self.learning_rate.default_input.dim() == 1):
         self.dynamic_lr = True
         self.assignadd = P.AssignAdd()
         self.gather = P.GatherV2()
         self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                      name="lars_global_step")
         self.axis = 0
Example #15
0
    def __init__(self, opt, train_parameter_groups=None, train_strategy=None):
        super(FreezeOpt, self).__init__()
        if not isinstance(opt, Optimizer):
            raise TypeError(
                f"The first arg 'opt' must be an Optimizer instance, but got {type(opt)}"
            )
        if train_strategy is not None and train_parameter_groups is None:
            raise ValueError(
                "When the 'train_strategy' is specified, the value of 'train_parameter_groups' "
                "must also be specified")
        opt_class = type(opt)
        opt_init_args = opt.init_args
        self.opts = []

        if train_parameter_groups is None:
            groups_num = 10
            step = 6
            parameters = opt.parameters
            para_groups = (parameters[(i * step):] for i in range(groups_num))
            self.opts = [
                opt_class(params=params, **opt_init_args)
                for params in para_groups
            ]
        else:
            if not isinstance(train_parameter_groups, (tuple, list)):
                raise TypeError(
                    "The specified 'train_parameter_groups' should be tuple or list"
                )
            for params in train_parameter_groups:
                if not isinstance(params, (tuple, list)):
                    raise TypeError(
                        "The each element of 'train_parameter_groups' should be tuple or list "
                        "to store the Parameter")
                for para in params:
                    if not isinstance(para, Parameter):
                        raise TypeError(
                            "The element of each group should be the Parameter"
                        )

                # generate one-to-one opt corresponding to the parameter group
                self.opts.append(opt_class(params=params, **opt_init_args))

        if isinstance(train_strategy, (tuple, list)):
            for ele in train_strategy:
                if not isinstance(ele, int):
                    raise ValueError(
                        "The element in train_strategy should be int number")
            self.train_strategy = Tensor(train_strategy, mstype.int32)
        elif isinstance(train_strategy, Tensor):
            if train_strategy.ndim != 1 or train_strategy.dtype != mstype.int32:
                raise ValueError(
                    "When train_strategy is a Tensor, the dimension should be 1 and "
                    "the dtype should be int32")
            self.train_strategy = train_strategy
        elif train_strategy is None:
            self.train_strategy = None
        else:
            raise TypeError(
                "The specified 'train_strategy' should be None, tuple, list or Tensor"
            )
Example #16
0
def test_create_primitive_object_on_construct_use_args_and_kwargs():
    """ test_create_primitive_object_on_construct_use_args_and_kwargs """
    log.debug("begin test_create_primitive_object_on_construct_use_args_and_kwargs")
    context.set_context(mode=context.GRAPH_MODE)
    inputs = Tensor(np.ones([1, 16, 16, 16]).astype(np.float32))
    net = NetE()
    net(inputs)
    log.debug("finished test_create_primitive_object_on_construct_use_args_and_kwargs")
Example #17
0
    def __init__(self, max_position_embeddings, embedding_dim, seq_length):
        super(LearnedPositionalEncoding, self).__init__()
        self.pe = nn.Embedding(
            max_position_embeddings, embedding_dim)
        self.seq_length = seq_length

        self.position_ids = Tensor(np.arange(self.seq_length).astype(np.int32))
        self.reshape = P.Reshape()
        self.position_ids = self.reshape(
            self.position_ids, (1, self.seq_length))
Example #18
0
    def __init__(self,
                 params,
                 learning_rate=0.1,
                 decay=0.9,
                 momentum=0.0,
                 epsilon=1e-10,
                 use_locking=False,
                 centered=False,
                 loss_scale=1.0,
                 weight_decay=0.0,
                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in
                 x.name):
        super(RMSProp, self).__init__(learning_rate, params)

        if isinstance(momentum, float) and momentum < 0.0:
            raise ValueError(
                "momentum should be at least 0.0, but got momentum {}".format(
                    momentum))

        if decay < 0.0:
            raise ValueError(
                "decay should be at least 0.0, but got dampening {}".format(
                    decay))
        self.decay = decay
        self.epsilon = epsilon

        validator.check_type("use_locking", use_locking, [bool])
        validator.check_type("centered", centered, [bool])
        self.centered = centered
        if centered:
            self.opt = P.ApplyCenteredRMSProp(use_locking)
            self.mg = self.parameters.clone(prefix="mean_grad", init='zeros')
        else:
            self.opt = P.ApplyRMSProp(use_locking)

        self.dynamic_lr = False
        if not isinstance(learning_rate, float):
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                         name="global_step")
            self.axis = 0
            self.one = Tensor(1, mstype.int32)

        self.momentum = momentum

        self.ms = self.parameters.clone(prefix="mean_square", init='zeros')
        self.moment = self.parameters.clone(prefix="moment", init='zeros')
        self.hyper_map = C.HyperMap()

        self.decay = decay
        self.decay_tf = tuple(decay_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.weight_decay = weight_decay * loss_scale
Example #19
0
def test_create_cell_object_on_construct():
    """ test_create_cell_object_on_construct """
    log.debug("begin test_create_object_on_construct")
    context.set_context(mode=context.GRAPH_MODE)
    np1 = np.random.randn(2, 3, 4, 5).astype(np.float32)
    input_me = Tensor(np1)

    net = Net()
    output = net(input_me)
    out_me1 = output.asnumpy()
    print(np1)
    print(out_me1)
    log.debug("finished test_create_object_on_construct")
Example #20
0
 def compute(self, x):
     """ compute"""
     NumBlock_x = self.NumBlock_x
     NumBlock_y = self.NumBlock_y
     large_x = self.fold(x)
     large_x = large_x.asnumpy()
     N, C, _, _ = large_x.shape
     leftup_idx_x = []
     leftup_idx_y = []
     for i in range(NumBlock_x):
         leftup_idx_x.append(i * self.kernel_size[0])
     for i in range(NumBlock_y):
         leftup_idx_y.append(i * self.kernel_size[1])
     fold_x = np.zeros((N, C, (NumBlock_x - 1) * self.stride + self.kernel_size[0], \
                                       (NumBlock_y - 1) * self.stride + self.kernel_size[1]), dtype=np.float32)
     for i in range(NumBlock_x):
         for j in range(NumBlock_y):
             fold_i = i * self.stride
             fold_j = j * self.stride
             org_i = leftup_idx_x[i]
             org_j = leftup_idx_y[j]
             fills = large_x[:, :, org_i:org_i + self.kernel_size[0], org_j:org_j + self.kernel_size[1]]
             t2 = fold_x[:, :, :fold_i, fold_j:fold_j + self.kernel_size[1]]
             zeros2 = np.zeros(t2.shape)
             concat1 = np.concatenate((zeros2, fills), axis=2)
             t3 = fold_x[:, :, fold_i + self.kernel_size[0]:, fold_j:fold_j + self.kernel_size[1]]
             zeros3 = np.zeros(t3.shape)
             concat2 = np.concatenate((concat1, zeros3), axis=2)
             t1 = fold_x[:, :, :, :fold_j]
             zeros1 = np.zeros(t1.shape)
             concat3 = np.concatenate((zeros1, concat2), axis=3)
             t4 = fold_x[:, :, :, fold_j + self.kernel_size[1]:]
             zeros4 = np.zeros(t4.shape)
             concat4 = np.concatenate((concat3, zeros4), axis=3)
             fold_x += concat4
     y = Tensor(fold_x, mstype.float16)
     return y
 def __init__(self,
              in_channels,
              out_channels,
              weight_init='normal',
              bias_init='zeros',
              has_bias=True):
     super(CommonHeadLastFN, self).__init__()
     weight_shape = [out_channels, in_channels]
     self.weight = Parameter(initializer(weight_init, weight_shape),
                             requires_grad=True,
                             name='weight')
     self.x_norm = P.L2Normalize(axis=1)
     self.w_norm = P.L2Normalize(axis=1)
     self.fc = P.MatMul(transpose_a=False, transpose_b=True)
     self.multiplier = Parameter(Tensor(np.ones([1]), mstype.float32),
                                 requires_grad=True,
                                 name='multiplier')
     self.has_bias = has_bias
     if self.has_bias:
         bias_shape = [out_channels]
         self.bias_add = P.BiasAdd()
         self.bias = Parameter(initializer(bias_init, bias_shape),
                               requires_grad=True,
                               name='bias')
Example #22
0
 def compute(self, x):
     """stride"""
     x = x.asnumpy()
     N, C, H, W = x.shape
     leftup_idx_x = []
     leftup_idx_y = []
     nh = (H - self.kernel_size) // self.stride + 1
     nw = (W - self.kernel_size) // self.stride + 1
     for i in range(nh):
         leftup_idx_x.append(i * self.stride)
     for i in range(nw):
         leftup_idx_y.append(i * self.stride)
     NumBlock_x = len(leftup_idx_x)
     NumBlock_y = len(leftup_idx_y)
     unf_x = np.zeros((N, C, NumBlock_x * self.kernel_size, NumBlock_y * self.kernel_size), dtype=np.float32)
     N, C, H, W = unf_x.shape
     for i in range(NumBlock_x):
         for j in range(NumBlock_y):
             unf_i = i * self.kernel_size
             unf_j = j * self.kernel_size
             org_i = leftup_idx_x[i]
             org_j = leftup_idx_y[j]
             fills = x[:, :, org_i:org_i + self.kernel_size,
                       org_j:org_j + self.kernel_size]
             zeros2 = np.zeros(unf_x[:, :, :unf_i, unf_j:unf_j + self.kernel_size].shape)
             concat1 = np.concatenate((zeros2, fills), axis=2)
             zeros3 = np.zeros(unf_x[:, :, unf_i + self.kernel_size:, unf_j:unf_j + self.kernel_size].shape)
             concat2 = np.concatenate((concat1, zeros3), axis=2)
             zeros1 = np.zeros(unf_x[:, :, :, :unf_j].shape)
             concat3 = np.concatenate((zeros1, concat2), axis=3)
             zeros4 = np.zeros(unf_x[:, :, :, unf_j + self.kernel_size:].shape)
             concat4 = np.concatenate((concat3, zeros4), axis=3)
             unf_x += concat4
     unf_x = Tensor(unf_x, mstype.float16)
     y = self.unfold(unf_x)
     return y
Example #23
0
 def infer_value(self, x, dev_mat, tensor_map):
     from mindspore.parallel._tensor import _load_tensor
     validator.check_value_type("dev_mat", dev_mat, [tuple], self.name)
     validator.check_value_type("tensor_map", tensor_map, [tuple],
                                self.name)
     return Tensor(_load_tensor(x, dev_mat, tensor_map))
Example #24
0
 def __init__(self):
     super(NetE, self).__init__()
     self.w = Parameter(Tensor(np.ones([16, 16, 3, 3]).astype(np.float32)), name='w')
Example #25
0
 def infer_value(self, x_value):
     return Tensor(np.arange(self.start, self.limit, self.delta),
                   dtype=x_value.dtype)
Example #26
0
 def __init__(self, shape=None, mean=0.0, stddev=1.0, seed=0):
     super(Net, self).__init__()
     self._mean = Tensor(mean, mstype.float32)
     self._stddev = Tensor(stddev, mstype.float32)
     self._normal = P.Normal(seed=seed)
     self._shape = shape
Example #27
0
    def __init__(self,
                 q_tensor_width,
                 k_tensor_width,
                 v_tensor_width,
                 hidden_width,
                 out_tensor_width,
                 num_attention_heads=1,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 out_act=None,
                 has_attention_mask=True,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 compute_type=mstype.float16,
                 same_dim=True):
        super(MultiheadAttention, self).__init__()
        self.num_attention_heads = num_attention_heads
        self.size_per_head = int(hidden_width / num_attention_heads)
        self.has_attention_mask = has_attention_mask
        self.use_one_hot_embeddings = use_one_hot_embeddings
        self.initializer_range = initializer_range
        self.do_return_2d_tensor = do_return_2d_tensor
        self.same_dim = same_dim

        self.scores_mul = Tensor(
            [1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type)
        self.reshape = P.Reshape()
        self.shape_q_2d = (-1, q_tensor_width)
        self.shape_k_2d = (-1, k_tensor_width)
        self.shape_v_2d = (-1, v_tensor_width)
        self.hidden_width = int(hidden_width)
        if self.same_dim:
            self.in_proj_layer = Parameter(Tensor(np.random.rand(hidden_width * 3,
                                                                 q_tensor_width), dtype=mstype.float32), name="weight")
        else:
            self.query_layer = nn.Dense(q_tensor_width,
                                        hidden_width,
                                        activation=query_act,
                                        has_bias=False).to_float(compute_type)
            self.key_layer = nn.Dense(k_tensor_width,
                                      hidden_width,
                                      activation=key_act,
                                      has_bias=False).to_float(compute_type)
            self.value_layer = nn.Dense(q_tensor_width,
                                        hidden_width,
                                        activation=value_act,
                                        has_bias=False).to_float(compute_type)
        self.out_proj = nn.Dense(hidden_width,
                                 out_tensor_width,
                                 activation=out_act,
                                 has_bias=False).to_float(compute_type)

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        self.multiply_data = Tensor([-10000.0,], dtype=compute_type)
        self.matmul = P.BatchMatMul()

        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(1. - attention_probs_dropout_prob)
        self.use_dropout = attention_probs_dropout_prob > 0

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.TensorAdd()
            self.cast = P.Cast()
            self.get_dtype = P.DType()

        self.softmax_cast = P.Cast()
        self.matmul_dense = P.MatMul(transpose_b=True)
        self.split = P.Split(0, 3)
        self.equal = P.Equal()
        self.shape = P.Shape()
Example #28
0
 def __init__(self):
     super(Net, self).__init__()
     self.matmul = P.MatMul()
     self.z = Parameter(Tensor(np.array([1.0], np.float32)), name='z')