コード例 #1
0
ファイル: optimizer.py プロジェクト: wenming2014/mindspore
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise ValueError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float], None)
        validator.check_number_range("loss_scale", loss_scale, 0.0,
                                     float("inf"), Rel.INC_NEITHER, None)

        if isinstance(weight_decay, int):
            weight_decay = float(weight_decay)
        validator.check_value_type("weight_decay", weight_decay, [float], None)
        validator.check_number_range("weight_decay", weight_decay, 0.0,
                                     float("inf"), Rel.INC_LEFT, None)

        self.is_group = False
        self.loss_scale = loss_scale
        if isinstance(learning_rate, float):
            self.dynamic_lr = False
            self.gather = None
            self.assignadd = None
            self.global_step = None
            self.scalar_lr = learning_rate
        else:
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')
            self.scalar_lr = None

        learning_rate = self._get_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        if self.is_group:
            self.learning_rate = ParameterTuple(self.group_lr)
            self.parameters = ParameterTuple(self.params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
        else:
            self.learning_rate = Parameter(learning_rate, name="learning_rate")
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.exec_weight_decay = any(self.decay_flags)
        self.param_length = len(self.parameters)
コード例 #2
0
ファイル: test_reshape.py プロジェクト: yrpang/mindspore
 def __init__(self, strategy0, strategy1, strategy2):
     super(ReshapeNet, self).__init__()
     self.relu = P.ReLU().shard(strategy0)
     self.reshape = P.Reshape().shard(strategy1)
     self.matmul = P.MatMul().shard(strategy2)
     self.matmul_weight = Parameter(Tensor(np.ones([25088, 256]), dtype=ms.float32), name="weight")
コード例 #3
0
def merge_sliced_parameter(sliced_parameters, strategy=None):
    """
    Merge parameter slices to one whole parameter.

    Args:
        sliced_parameters (list[Parameter]): Parameter slices in order of rank_id.
        strategy (dict): Parameter slice strategy, the default is None.
            If strategy is None, just merge parameter slices in 0 axis order.

            - key (str): Parameter name.
            - value (<class 'node_strategy_pb2.ParallelLayouts'>): Slice strategy of this parameter.

    Returns:
        Parameter, the merged parameter which has the whole data.

    Raises:
        ValueError: Failed to merge.
        TypeError: The sliced_parameters is incorrect or strategy is not dict.
        KeyError: The parameter name is not in keys of strategy.

    Examples:
        >>> strategy = build_searched_strategy("./strategy_train.ckpt")
        >>> sliced_parameters = [
        >>>                      Parameter(Tensor(np.array([0.00023915, 0.00013939, -0.00098059])),
        >>>                                "network.embedding_table"),
        >>>                      Parameter(Tensor(np.array([0.00015815, 0.00015458, -0.00012125])),
        >>>                                "network.embedding_table"),
        >>>                      Parameter(Tensor(np.array([0.00042165, 0.00029692, -0.00007941])),
        >>>                                "network.embedding_table"),
        >>>                      Parameter(Tensor(np.array([0.00084451, 0.00089960, -0.00010431])),
        >>>                                "network.embedding_table")]
        >>> merged_parameter = merge_sliced_parameter(sliced_parameters, strategy)
    """
    if not isinstance(sliced_parameters, list):
        raise TypeError(
            f"The sliced_parameters should be list, but got {type(sliced_parameters)}."
        )

    if not sliced_parameters:
        raise ValueError("The sliced_parameters should not be empty.")

    if strategy and not isinstance(strategy, dict):
        raise TypeError(
            f"The strategy should be dict, but got {type(strategy)}.")

    try:
        parameter_name = sliced_parameters[0].name
        parameter_shape = sliced_parameters[0].data.shape
        parameter_shape_length = len(parameter_shape)
    except BaseException as e:
        raise TypeError(
            f"{e.__str__()}. the element in sliced_parameters should be Parameter."
        )

    is_even = True
    for index, parameter in enumerate(sliced_parameters):
        if not isinstance(parameter, Parameter):
            raise TypeError(
                f"The element in sliced_parameters should be Parameter, "
                f"but got {type(parameter)} at index {index}.")

        if parameter.name != parameter_name \
                or len(parameter.data.shape) != parameter_shape_length \
                or parameter.data.shape[1:] != parameter_shape[1:]:
            raise ValueError(
                "Please make sure that the elements in slice_parameters have the same name, "
                "dimension length and shape except 0 axis")

        if parameter.data.shape != parameter_shape:
            is_even = False

    layerwise_parallel = sliced_parameters[0].layerwise_parallel
    requires_grad = sliced_parameters[0].requires_grad
    sliced_data = [parameter.data.asnumpy() for parameter in sliced_parameters]
    merged_parameter = None

    if not strategy:
        merged_tensor = Tensor(np.concatenate(sliced_data))
        merged_parameter = Parameter(merged_tensor, parameter_name,
                                     requires_grad, layerwise_parallel)

    else:
        if parameter_name not in strategy.keys():
            raise KeyError(
                f"The parameter name should be one key of strategy. "
                f"the parameter name is {parameter_name}.")
        merged_tensor = _merge_param_with_strategy(sliced_data, parameter_name,
                                                   strategy, is_even)
        merged_parameter = Parameter(merged_tensor, parameter_name,
                                     requires_grad, layerwise_parallel)

    return merged_parameter
コード例 #4
0
ファイル: optimizer.py プロジェクト: fangbaohui/mindspore
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_number_range("loss_scale", loss_scale, 0.0,
                                     float("inf"), Rel.INC_NEITHER,
                                     self.cls_name)

        if isinstance(weight_decay, int):
            weight_decay = float(weight_decay)
        validator.check_value_type("weight_decay", weight_decay, [float],
                                   self.cls_name)
        validator.check_number_range("weight_decay", weight_decay, 0.0,
                                     float("inf"), Rel.INC_LEFT, self.cls_name)

        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        self.loss_scale = loss_scale
        if isinstance(learning_rate, int):
            learning_rate = float(learning_rate)
        if isinstance(learning_rate, float):
            self.dynamic_lr = False
            self.gather = None
            self.assignadd = None
            self.global_step = None
            self.scalar_lr = learning_rate
        else:
            self.dynamic_lr = True
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')
            self.scalar_lr = None

        learning_rate = self._get_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        if self.is_group_lr:
            self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = Parameter(learning_rate, name="learning_rate")

        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.exec_weight_decay = any(self.decay_flags)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()

        use_parallel = auto_parallel_context().get_enable_parallel_optimizer()
        self.use_parallel = use_parallel
        if use_parallel:
            if self.cls_name not in [
                    "Lamb", "AdamWeightDecayDynamicLR", "AdamWeightDecay"
            ]:
                raise RuntimeError(
                    "Optimizer segmentation does not support optimizer {}".
                    format(self.cls_name))
            if _get_parallel_mode() not in [
                    ParallelMode.HYBRID_PARALLEL, ParallelMode.DATA_PARALLEL,
                    ParallelMode.AUTO_PARALLEL
            ]:
                raise RuntimeError(
                    "Optimizer segmentation does not support parallel mode {}".
                    format(_get_parallel_mode()))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError(
                    "Optimizer segmentation can not be applied when the number of parameters {} is"
                    " less than the number of devices {}".format(
                        self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(
                map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)
        else:
            self.optim_filter = (True, ) * self.param_length
コード例 #5
0
ファイル: thor.py プロジェクト: huxian123/mindspore
    def __init__(self,
                 params,
                 learning_rate,
                 momentum,
                 matrix_A,
                 matrix_G,
                 A_inv_max,
                 G_inv_max,
                 weight_decay=0.0,
                 loss_scale=1.0,
                 decay_filter=lambda x: x.name not in []):
        super(THOR, self).__init__(learning_rate, params, weight_decay,
                                   loss_scale)
        if isinstance(momentum, float) and momentum < 0.0:
            raise ValueError(
                "momentum should be at least 0.0, but got momentum {}".format(
                    momentum))
        self.momentum = Parameter(Tensor(momentum, mstype.float32),
                                  name="momentum")
        self.params = self.parameters
        self.moments = self.params.clone(prefix="moments", init='zeros')
        self.hyper_map = C.HyperMap()
        self.opt = P.ApplyMomentum()
        self.matrix_A = ParameterTuple(matrix_A)
        self.matrix_G = ParameterTuple(matrix_G)
        self.A_inv_max = ParameterTuple(A_inv_max)
        self.G_inv_max = ParameterTuple(G_inv_max)
        self.cube_matmul_left = P.CusMatMulCubeFraczLeftCast()
        self.cube_matmul_left_fc = P.CusMatMulCubeDenseLeft()
        self.cube_matmul_right_fc = P.CusMatMulCubeDenseRight()
        self.cube_matmul_right_mul = P.CusMatMulCubeFraczRightMul()
        self.transpose = P.Transpose()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.mul = P.Mul()
        self.weight_idx = []
        for i in range(len(self.params)):
            if "conv" in self.params[i].name or "end_point" in self.params[
                    i].name:
                self.weight_idx.append(i)
        self.weight_idx.append(len(self.params))
        self.feature_map = [
            1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136,
            1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136,
            1.0 / 3136, 1.0 / 3136, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784,
            1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784,
            1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 196, 1.0 / 196, 1.0 / 196,
            1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196,
            1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196,
            1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49,
            1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49,
            1.0 / 49, 1.0
        ]
        mean = _get_gradients_mean()
        degree = _get_device_num()
        parameter_length = len(self.feature_map)
        self.grad_reducer_Amax = DistributedGradReducerThor(
            parameter_length, ((27, ), 2), mean, degree)
        self.grad_reducer_Gmax = DistributedGradReducerThor(
            parameter_length, ((27, ), 4), mean, degree)
        self.grad_reducer_A = DistributedGradReducerThor(
            parameter_length, ((27, ), 6), mean, degree)
        self.grad_reducer_G = DistributedGradReducerThor(
            parameter_length, ((27, ), 8), mean, degree)
        self.matrix_A_inv = ()
        self.matrix_G_inv = ()
        self.matrix_max_inv = ()

        for i in range(54):
            self.matrix_max_inv = self.matrix_max_inv + (Parameter(
                initializer(1, [1], mstype.float32),
                name="matrix_max" + str(i),
                requires_grad=False), )
        self.log = P.Log()
        self.exp = P.Exp()
        self.sqrt = P.Sqrt()
        self.matrix_max_inv = ParameterTuple(self.matrix_max_inv)
        self.assign = P.Assign()
        self.cast = P.Cast()
        self.thor = True
        self.weight_decay = weight_decay * loss_scale
        self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
コード例 #6
0
    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
        super(Net, self).__init__()

        num_directions = 1
        if bidirectional:
            num_directions = 2

        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)

        input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836],
                              [-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]],

                             [[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301],
                              [0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]],

                             [[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953],
                              [1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]],

                             [[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707],
                              [-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]],

                             [[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999],
                              [-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634,
                               0.5384]]]).astype(np.float32)

        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')

        self.h = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='h')

        self.c = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='c')

        wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964],
                           [-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299],
                           [0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952],
                           [0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849],
                           [0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280],
                           [-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416],
                           [0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124],
                           [-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976,
                            -0.3757]]).astype(np.float32).reshape([1, -1])

        whh_l0 = np.array([[0.3358, 0.2790],
                           [-0.5355, 0.0989],
                           [-0.1402, 0.5120],
                           [0.1335, 0.1653],
                           [0.3533, -0.3531],
                           [0.4166, -0.4420],
                           [-0.5454, -0.1720],
                           [0.0041, -0.0799]]).astype(np.float32).reshape([1, -1])

        bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype(
            np.float32).reshape([1, -1])
        bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype(
            np.float32).reshape([1, -1])

        wih_reverse_l0 = np.array(
            [[-0.7048, -0.1768, 0.2288, -0.0760, -0.1319, 0.0820, -0.4132, 0.3644, 0.3919, 0.2449],
             [0.0551, -0.0530, -0.5883, 0.0799, -0.5025, 0.1500, -0.4067, -0.3764, -0.3018, 0.2467],
             [-0.2279, 0.3144, 0.5705, 0.4617, 0.1729, 0.6539, -0.2086, 0.5355, 0.4439, 0.0122],
             [0.6967, -0.5245, 0.3527, 0.3386, 0.0429, -0.3803, -0.4328, -0.4767, 0.4481, -0.2405],
             [0.6744, -0.2776, 0.0798, 0.1543, 0.6421, 0.6102, 0.3591, -0.4431, -0.6327, -0.0075],
             [-0.4520, 0.4201, -0.2374, -0.1556, -0.4175, -0.6834, 0.3096, -0.1581, 0.0127, 0.6872],
             [0.1788, -0.5442, -0.3675, -0.2887, -0.3004, 0.5813, 0.1618, 0.6875, -0.4678, 0.0071],
             [-0.6453, -0.2528, 0.5675, -0.5154, -0.4129, -0.0214, 0.5539, 0.0343, 0.1712, 0.5644]]).astype(
            np.float32).reshape([1, -1])

        whh_reverse_l0 = np.array([[-0.6657, 0.6330],
                                   [-0.2290, 0.6556],
                                   [0.4808, -0.2712],
                                   [0.0407, -0.2587],
                                   [0.3837, 0.0382],
                                   [0.2268, 0.1217],
                                   [-0.6404, -0.3336],
                                   [0.5461, -0.0764]]).astype(np.float32).reshape([1, -1])

        bih_reverse_l0 = np.array([0.0314, 0.1009, 0.3664, -0.6732, -0.6944, 0.5098, -0.1251, 0.2644]).astype(
            np.float32).reshape([1, -1])
        bhh_reverse_l0 = np.array([-0.1961, -0.3836, 0.1191, -0.7022, -0.0961, 0.5493, -0.6979, 0.0017]).astype(
            np.float32).reshape([1, -1])

        wih_l1 = np.array([[1.2746e-01, -3.3346e-01, 1.5589e-01, -4.7986e-01],
                           [6.5835e-01, 3.8135e-01, -3.8409e-01, -3.6499e-01],
                           [-6.0374e-04, -1.2227e-01, -1.5955e-01, 4.2772e-01],
                           [-1.8281e-01, -5.0484e-01, 7.0204e-01, 6.5872e-01],
                           [3.7765e-01, -4.3494e-01, 3.1503e-01, -4.2504e-02],
                           [6.3506e-01, -4.3049e-02, -5.7413e-01, -2.5134e-01],
                           [8.7181e-02, -5.5216e-01, 5.5436e-01, -3.9599e-01],
                           [4.4611e-01, -4.2690e-01, 6.6142e-01, 6.3882e-01]]).astype(np.float32).reshape([1, -1])

        whh_l1 = np.array([[-0.0049, -0.3267],
                           [0.0863, -0.6277],
                           [0.4815, -0.2236],
                           [0.5996, -0.3441],
                           [0.3959, -0.0249],
                           [0.3986, -0.0922],
                           [-0.5321, 0.0877],
                           [0.2811, -0.0483]]).astype(np.float32).reshape([1, -1])

        bih_l1 = np.array([0.0032, -0.0893, 0.5706, 0.3712, 0.0590, 0.0044, 0.2417, 0.1291]).astype(np.float32).reshape(
            [1, -1])
        bhh_l1 = np.array([-0.0704, 0.3908, -0.1121, 0.6970, -0.6216, 0.6340, -0.2945, 0.5224]).astype(
            np.float32).reshape([1, -1])

        wih_reverse_l1 = np.array([[-0.2693, 0.3487, 0.0692, 0.0047],
                                   [0.6187, 0.5649, 0.0680, 0.5110],
                                   [-0.5262, -0.3307, -0.3892, 0.5382],
                                   [-0.2925, 0.5185, -0.1385, 0.3431],
                                   [-0.3252, 0.3809, -0.4680, 0.3379],
                                   [0.4763, -0.5465, 0.0033, -0.5144],
                                   [0.3826, -0.3879, -0.2439, 0.2571],
                                   [-0.0422, -0.0359, -0.4197, -0.2209]]).astype(np.float32).reshape([1, -1])

        whh_reverse_l1 = np.array([[-0.4691, 0.5944],
                                   [-0.6885, 0.1708],
                                   [0.6391, -0.3690],
                                   [-0.5919, 0.1805],
                                   [-0.6853, -0.6215],
                                   [-0.4635, -0.6714],
                                   [-0.2050, 0.0513],
                                   [0.3411, -0.2833]]).astype(np.float32).reshape([1, -1])

        bih_reverse_l1 = np.array([0.5764, -0.7010, -0.0831, -0.3779, -0.2743, 0.0480, -0.2707, -0.5583]).astype(
            np.float32).reshape([1, -1])
        bhh_reverse_l1 = np.array([0.3379, -0.2671, -0.2789, -0.6611, -0.5542, -0.0188, 0.1831, 0.3612]).astype(
            np.float32).reshape([1, -1])

        '''
        weight
            layer0
                forward
                    wih            
                    whh
                reverse
                    wih
                    whh
            layer1
                forward
                    wih
                    whh
                reverse
                    wih
                    whh
            ... ...
        bias:
            layer0
                forward
                    bih
                    bhh
                reverse
                    bih
                    bhh
            layer1
                forward
                    bih
                    bhh
                reverse
                    bih
                    bhh
            ... ...
        '''
        w_np = np.concatenate(
            (wih_l0, whh_l0, wih_reverse_l0, whh_reverse_l0, wih_l1, whh_l1, wih_reverse_l1, whh_reverse_l1,
             bih_l0, bhh_l0, bih_reverse_l0, bhh_reverse_l0, bih_l1, bhh_l1, bih_reverse_l1, bhh_reverse_l1),
            axis=1).reshape([-1, 1, 1])

        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
コード例 #7
0
 def __init__(self, max_cycles=10):
     super(ForwardNetNoAssign, self).__init__()
     self.max_cycles = max_cycles
     self.i = Tensor(np.array(0), mstype.int32)
     self.zero = Tensor(np.array(0), mstype.int32)
     self.weight = Parameter(Tensor(np.array(0), mstype.int32))
コード例 #8
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 has_bias=False,
                 weight_init='normal',
                 bias_init='zeros'):
        Validator.check_value_type("kernel_size", kernel_size, [int],
                                   self.cls_name)
        Validator.check_value_type("stride", stride, [int], self.cls_name)
        Validator.check_value_type("padding", padding, [int], self.cls_name)
        Validator.check_value_type("dilation", dilation, [int], self.cls_name)
        Validator.check_integer('kernel_size', kernel_size, 1, Rel.GE,
                                self.cls_name)
        Validator.check_integer('stride', stride, 1, Rel.GE, self.cls_name)
        Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name)
        Validator.check_integer('dilation', dilation, 1, Rel.GE, self.cls_name)
        kernel_size = (1, kernel_size)
        stride = (1, stride)
        dilation = (1, dilation)
        get_shape = P.Shape()
        get_dtype = P.DType()
        if isinstance(weight_init, Tensor):
            weight_init_shape = get_shape(weight_init)
            Validator.check_integer('weight_init_shape',
                                    len(weight_init_shape), 3, Rel.EQ,
                                    self.cls_name)
            weight_init_dtype = get_dtype(weight_init)
            weight_init_value = weight_init.asnumpy()
            weight_init_value = np.expand_dims(weight_init_value, 2)
            weight_init = Tensor(weight_init_value, weight_init_dtype)
        # out_channels and in_channels swap.
        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
        # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
        super(Conv1dTranspose, self).__init__(in_channels,
                                              out_channels,
                                              kernel_size,
                                              stride,
                                              pad_mode,
                                              padding,
                                              dilation,
                                              group,
                                              has_bias,
                                              weight_init,
                                              bias_init,
                                              transposed=True)
        self.padding = (0, 0, padding, padding)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.shape = P.Shape()
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError(
                'Attr \'pad_mode\' of \'Conv1dTranspose\' Op passed ' +
                str(pad_mode) +
                ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.is_valid = self.pad_mode == 'valid'
        self.is_same = self.pad_mode == 'same'
        self.is_pad = self.pad_mode == 'pad'
        if check_bool(has_bias):
            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name='bias')

        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
        self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
                                                      kernel_size=kernel_size,
                                                      mode=1,
                                                      pad_mode=pad_mode,
                                                      pad=self.padding,
                                                      stride=stride,
                                                      dilation=dilation,
                                                      group=group)
        self.bias_add = P.BiasAdd()
        self.expand_dims = P.ExpandDims()
        self.squeeze = P.Squeeze(2)
コード例 #9
0
 def __init__(self):
     super().__init__()
     self.param_a = Parameter(Tensor(5, mstype.int32), name='a')
     self.param_b = Parameter(Tensor(4, mstype.int32), name='b')
コード例 #10
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 pad_mode,
                 padding,
                 dilation,
                 group,
                 has_bias,
                 weight_init,
                 bias_init,
                 transposed=False):
        super(_Conv, self).__init__()
        self.in_channels = check_int_positive(in_channels)
        self.out_channels = check_int_positive(out_channels)
        self.kernel_size = kernel_size
        self.stride = stride
        self.pad_mode = pad_mode
        self.weight_init = weight_init
        self.bias_init = bias_init
        if isinstance(padding, int):
            Validator.check_integer('padding', padding, 0, Rel.GE,
                                    self.cls_name)
            self.padding = padding
        elif isinstance(padding, tuple):
            for pad in padding:
                Validator.check_integer('padding item', pad, 0, Rel.GE,
                                        self.cls_name)
            self.padding = padding
        else:
            raise TypeError(
                "padding type must be int/tuple(int) cannot be {}!".format(
                    type(padding)))

        self.dilation = dilation
        self.group = check_int_positive(group)
        self.has_bias = has_bias
        if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
                isinstance(kernel_size[0], bool) or isinstance(kernel_size[1], bool) or \
                kernel_size[0] < 1 or kernel_size[1] < 1:
            raise ValueError(
                "Attr 'kernel_size' of 'Conv2D' Op passed " +
                str(self.kernel_size) +
                ", should be a int or tuple and equal to or greater than 1.")
        if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or \
                isinstance(stride[0], bool) or isinstance(stride[1], bool) or stride[0] < 1 or stride[1] < 1:
            raise ValueError(
                "Attr 'stride' of 'Conv2D' Op passed " + str(self.stride) +
                ", should be a int or tuple and equal to or greater than 1.")
        if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \
                isinstance(dilation[0], bool) or isinstance(dilation[1], bool) or dilation[0] < 1 or dilation[1] < 1:
            raise ValueError(
                "Attr 'dilation' of 'Conv2D' Op passed " + str(self.dilation) +
                ", should be a int or tuple and equal to or greater than 1.")
        if in_channels % group != 0:
            raise ValueError(
                "Attr 'in_channels' of 'Conv2D' Op must be divisible by "
                "attr 'group' of 'Conv2D' Op.")
        if out_channels % group != 0:
            raise ValueError(
                "Attr 'out_channels' of 'Conv2D' Op must be divisible by "
                "attr 'group' of 'Conv2D' Op.")
        if transposed:
            shape = [in_channels, out_channels // group, *kernel_size]
        else:
            shape = [out_channels, in_channels // group, *kernel_size]
        self.weight = Parameter(initializer(self.weight_init, shape),
                                name='weight')

        if check_bool(has_bias):
            self.bias = Parameter(initializer(self.bias_init, [out_channels]),
                                  name='bias')
        else:
            if self.bias_init != 'zeros':
                logger.warning(
                    "Value of 'has_bias' is False, value of 'bias_init' will be ignored."
                )
            self.bias = None
コード例 #11
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 has_bias=False,
                 weight_init='normal',
                 bias_init='zeros'):
        kernel_size = twice(kernel_size)
        stride = twice(stride)
        dilation = twice(dilation)
        Validator.check_value_type('padding', padding, (int, tuple),
                                   self.cls_name)
        if isinstance(padding, tuple):
            Validator.check_integer('padding size', len(padding), 4, Rel.EQ,
                                    self.cls_name)
        # out_channels and in_channels swap.
        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel,
        # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel.
        super(Conv2dTranspose, self).__init__(in_channels,
                                              out_channels,
                                              kernel_size,
                                              stride,
                                              pad_mode,
                                              padding,
                                              dilation,
                                              group,
                                              has_bias,
                                              weight_init,
                                              bias_init,
                                              transposed=True)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.shape = P.Shape()
        if pad_mode not in ('valid', 'same', 'pad'):
            raise ValueError(
                'Attr \'pad_mode\' of \'Conv2dTranspose\' Op passed ' +
                str(pad_mode) +
                ', should be one of values in \'valid\', \'same\', \'pad\'.')
        self.is_valid = self.pad_mode == 'valid'
        self.is_same = self.pad_mode == 'same'
        self.is_pad = self.pad_mode == 'pad'
        if check_bool(has_bias):
            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name='bias')

        # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel.
        self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels,
                                                      kernel_size=kernel_size,
                                                      mode=1,
                                                      pad_mode=pad_mode,
                                                      pad=padding,
                                                      stride=stride,
                                                      dilation=dilation,
                                                      group=group)
        self.bias_add = P.BiasAdd()
        if isinstance(self.padding, int):
            self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (
                self.padding, ) * 4
        else:
            self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
コード例 #12
0
ファイル: lstm.py プロジェクト: xyg320/mindspore
 def __init__(self,
              input_size,
              hidden_size,
              num_layers=1,
              has_bias=True,
              batch_first=False,
              dropout=0,
              bidirectional=False):
     super(LSTM, self).__init__()
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.has_bias = has_bias
     self.batch_first = validator.check_value_type("batch_first",
                                                   batch_first, [bool],
                                                   self.cls_name)
     self.hidden_size = validator.check_integer("hidden_size", hidden_size,
                                                0, Rel.GT, self.cls_name)
     self.num_layers = validator.check_integer("num_layers", num_layers, 0,
                                               Rel.GT, self.cls_name)
     self.dropout = float(dropout)
     self.bidirectional = bidirectional
     if self.batch_first:
         self.transpose1 = P.Transpose()
         self.transpose2 = P.Transpose()
     num_directions = 2 if self.bidirectional else 1
     self.cpu_target = False
     if context.get_context("device_target") == "CPU":
         self.cpu_target = True
     if not self.cpu_target:
         self.lstm = P.LSTM(input_size=self.input_size,
                            hidden_size=self.hidden_size,
                            num_layers=self.num_layers,
                            has_bias=self.has_bias,
                            bidirectional=self.bidirectional,
                            dropout=self.dropout)
         weight_size = 0
         gate_size = 4 * self.hidden_size
         for layer in range(self.num_layers):
             input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions
             increment_size = gate_size * input_layer_size
             increment_size += gate_size * self.hidden_size
             if self.has_bias:
                 increment_size += 2 * gate_size
             weight_size += increment_size * num_directions
         stdv = 1 / math.sqrt(hidden_size)
         w_np = np.random.uniform(-stdv, stdv,
                                  (weight_size, 1, 1)).astype(np.float32)
         self.weight = Parameter(initializer(Tensor(w_np),
                                             [weight_size, 1, 1]),
                                 name='weight')
     else:
         input_size_list = []
         input_size_list.append(self.input_size)
         for i in range(self.num_layers - 1):
             input_size_list.append(self.hidden_size * num_directions)
         weights = []
         layers = []
         bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4
         stdv = 1 / math.sqrt(hidden_size)
         for i in range(num_layers):
             weight_size = (input_size_list[i] + self.hidden_size
                            ) * num_directions * self.hidden_size * 4
             if has_bias:
                 weight_size = weight_size + bias_size
             w_np = np.random.uniform(
                 -stdv, stdv, (weight_size, 1, 1)).astype(np.float32)
             weights.append(
                 Parameter(initializer(Tensor(w_np), w_np.shape),
                           name='weight' + str(i)))
             layers.append(
                 nn.LSTMCell(input_size=input_size_list[i],
                             hidden_size=self.hidden_size,
                             has_bias=self.has_bias,
                             bidirectional=self.bidirectional,
                             dropout=self.dropout))
         self.lstms = layers
         self.weight = ParameterTuple(tuple(weights))
     self.fill = P.Fill()
     self.shape = P.Shape()
コード例 #13
0
def test_parameter():
    x = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
    x = x.init_data()
    z = x / 2
    print(z)
コード例 #14
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=None,
                 device_num_each_group=1,
                 input_dims='2d',
                 data_format='NCHW'):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError(
                "momentum should be a number in range [0, 1], but got {}".
                format(momentum))
        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'],
                                             'format', self.cls_name)
        if context.get_context(
                "device_target") != "GPU" and self.format == "NHWC":
            raise ValueError("NHWC format only support in GPU target.")
        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.input_dims = input_dims
        self.moving_mean = Parameter(initializer(moving_mean_init,
                                                 num_features),
                                     name="mean",
                                     requires_grad=False)
        self.moving_variance = Parameter(initializer(moving_var_init,
                                                     num_features),
                                         name="variance",
                                         requires_grad=False)
        self.gamma = Parameter(initializer(gamma_init, num_features),
                               name="gamma",
                               requires_grad=affine)
        self.beta = Parameter(initializer(beta_init, num_features),
                              name="beta",
                              requires_grad=affine)
        self.group = validator.check_positive_int(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i),
                                            self.rank_list[i])
                    self.all_reduce = P.AllReduce(
                        P.ReduceOp.SUM,
                        'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self._target = context.get_context("device_target")
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - momentum
        if context.get_context("enable_ge"):
            self.is_ge_backend = True
        else:
            self.is_ge_backend = False

        if self._target == "Ascend":
            self.bn_train = P.BatchNorm(is_training=True,
                                        epsilon=self.eps,
                                        momentum=self.momentum)
        if self._target == "GPU":
            self.bn_train = P.FusedBatchNormEx(mode=1,
                                               epsilon=self.eps,
                                               momentum=self.momentum,
                                               data_format=self.format)
        if self._target == "CPU":
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False,
                                    epsilon=self.eps,
                                    data_format=self.format)
        self.enable_global_sync = self.is_global and (self.is_ge_backend or\
            (self.is_graph_mode and self._target == "Ascend"))

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
        self.sub_mean = P.Sub().shard(data_parallel_strategy)
        self.sub_var = P.Sub().shard(data_parallel_strategy)
        self.mul_mean = P.Mul().shard(data_parallel_strategy_one)
        self.mul_var = P.Mul().shard(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
コード例 #15
0
    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
        super(LstmNet, self).__init__()

        num_directions = 1
        if bidirectional:
            num_directions = 2

        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)

        input_np = np.array([[[0.6755, -1.6607, 0.1367, -0.9209, -1.7088, 0.3953, 2.7120, 0.1103, 0.1504, -0.3611],
                              [0.4276, -0.7850, -0.3758, 0.8604, -0.1361, -1.3618, -0.6251, -0.8391, 0.8142, 0.4068]],

                             [[-0.6424, -0.6095, 0.6639, -0.7253, 2.1190, -0.2840, 0.3858, 0.1691, 0.6764, 1.2903],
                              [0.7918, 0.4147, -0.5089, -0.3582, -1.4279, -0.7975, -0.0390, -0.4718, 0.4322, -0.7995]],

                             [[-1.5612, 0.0120, -0.7289, -1.2479, -0.6197, -0.6099, 0.9543, 0.4362, -1.3141, 0.4273],
                              [-0.6656, -0.6626, -0.5883, -0.6922, 0.5512, 1.7031, -1.2812, -0.2004, -0.9224, 0.4106]],

                             [[-0.9667, -0.6296, -0.7310, 1.2503, -0.1650, 1.2050, -0.1704, -0.5215, 0.1595, 0.3904],
                              [0.1026, -0.6821, -0.4387, -1.1637, -0.5000, 0.0590, 0.5219, -0.6835, 2.4406, 0.7135]],

                             [[-0.4710, 0.6558, -0.3144, -1.2213, 0.1556, -0.3836, -0.1081, -0.1440, -1.1231, 0.6279],
                              [-0.8449, -0.2184, -0.1806, -0.0615, -0.5660, -0.3556, 1.6891, -1.0286, 1.3361,
                               -0.4313]]]).astype(np.float32)

        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')

        self.h = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='h')

        self.c = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='c')

        wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01, 2.3627e-01, 3.7844e-01,
                         2.8770e-01, 4.1631e-01, -6.2628e-01, -4.8008e-01, -4.9148e-01],
                        [-6.4257e-02, -2.4807e-01, 1.3550e-02, 6.8946e-01, -1.2608e-02,
                         -7.1719e-02, -1.3566e-01, -4.9215e-01, 2.8509e-01, -6.3540e-01],
                        [-6.9863e-01, 5.9773e-01, -3.9062e-01, -7.6151e-02, 5.6803e-04,
                         -7.0420e-01, -6.1822e-01, 4.1854e-01, 4.0596e-01, 6.4867e-01],
                        [-3.0253e-01, -1.9464e-01, 7.0591e-01, 4.9368e-01, -5.9758e-01,
                         1.3251e-02, 3.5685e-01, -3.7640e-01, -4.4612e-01, 5.1794e-01],
                        [-3.2140e-01, 5.5578e-01, 6.3589e-01, -6.4249e-01, 5.7258e-01,
                         2.4256e-01, -2.7954e-01, 2.5202e-01, 2.9235e-01, -3.9979e-01],
                        [1.6547e-01, -7.9030e-02, -2.0045e-01, 6.2484e-01, -1.0727e-01,
                         -5.0010e-01, -2.9165e-01, -1.7620e-01, 1.5939e-01, -2.2744e-01],
                        [-4.0835e-01, 3.6751e-01, 4.7989e-01, 5.8886e-01, 5.3598e-01,
                         -2.9055e-01, -2.8129e-01, 6.0219e-01, 4.9193e-01, 3.3115e-01],
                        [-5.6894e-01, -5.0359e-01, 4.7491e-01, 5.8110e-01, -5.4921e-01,
                         -6.1343e-01, -5.8236e-02, -3.7682e-01, 4.8338e-01, -2.1551e-01]]).astype(np.float32).reshape(
            [1, -1])

        whh = np.array([[-0.4820, -0.2350],
                        [-0.1195, 0.0519],
                        [0.4511, -0.3961],
                        [-0.5962, 0.0906],
                        [0.2162, -0.1178],
                        [0.6237, 0.0711],
                        [0.1867, -0.1225],
                        [0.1831, 0.0850]]).astype(np.float32).reshape([1, -1])

        bih = np.array([-0.2862, 0.0034, 0.2059, -0.6544, 0.3244, -0.2472, 0.0852, -0.3050]).astype(np.float32).reshape(
            [1, -1])
        bhh = np.array([-0.6575, 0.1562, -0.6434, 0.0212, -0.2493, -0.5626, 0.1530, -0.5235]).astype(
            np.float32).reshape([1, -1])

        w_np = np.concatenate((wih, whh, bih, bhh), axis=1).reshape([-1, 1, 1])

        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
コード例 #16
0
 def __init__(self):
     super(Net_half, self).__init__()
     self.sparse_apply_ftrl = P.SparseApplyFtrl(lr=0.001, l1=0.0, l2=0.0, lr_power=-0.5, use_locking=False)
     self.var = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float16)), name="var")
     self.accum = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float16)), name="accum")
     self.linear = Parameter(Tensor(np.ones([3, 3, 3]).astype(np.float16)), name="linear")
コード例 #17
0
    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
        super(MultiLayerBiLstmNet, self).__init__()

        num_directions = 1
        if bidirectional:
            num_directions = 2

        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)

        input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
                              [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],

                             [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
                              [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],

                             [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
                              [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],

                             [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
                              [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],

                             [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
                              [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
                               -1.0685]]]).astype(np.float32)

        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')

        self.h = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='h')

        self.c = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='c')

        wih_l0 = np.array([[0.3715, -0.0723, 0.6017, 0.5115, -0.5357, 0.3794, -0.3752, -0.6205, -0.0370, -0.2904],
                           [0.7055, -0.4156, -0.3650, -0.0964, 0.4141, -0.2584, -0.4765, -0.0045, 0.2943, -0.2648],
                           [0.1355, 0.1697, 0.1883, 0.3754, 0.3744, -0.6128, 0.2328, -0.1275, 0.6604, 0.6498],
                           [-0.0266, 0.5805, -0.5358, -0.0929, 0.0797, 0.3744, 0.3299, -0.3825, 0.5804, -0.0855],
                           [0.1141, 0.2587, -0.4370, 0.6430, -0.0017, 0.4865, 0.2814, 0.6213, -0.6415, 0.4574],
                           [-0.3958, -0.5827, -0.1056, 0.6987, -0.6591, -0.1326, 0.5237, 0.4667, -0.7001, -0.2326],
                           [0.3074, -0.3118, -0.4591, 0.2481, -0.2978, -0.1850, 0.4770, -0.0126, 0.3655, -0.4306],
                           [0.3033, -0.6264, -0.6551, 0.0069, -0.5238, -0.3950, 0.5681, -0.4931, -0.6258,
                            0.4079]]).astype(np.float32).reshape([1, -1])

        whh_l0 = np.array([[-0.3870, 0.0238],
                           [-0.3758, 0.2490],
                           [0.5437, -0.4117],
                           [0.1181, -0.2043],
                           [-0.5335, 0.1188],
                           [-0.0822, 0.2154],
                           [0.5844, -0.3239],
                           [-0.6537, 0.0278]]).astype(np.float32).reshape([1, -1])

        bih_l0 = np.array([0.5440, 0.5995, 0.0155, -0.6254, 0.5114, 0.3364, -0.1824, -0.6262]).astype(
            np.float32).reshape([1, -1])
        bhh_l0 = np.array([0.4139, -0.2513, -0.4023, 0.4222, 0.6387, -0.6147, 0.0677, 0.5355]).astype(
            np.float32).reshape([1, -1])

        wih_reverse_l0 = np.array([[6.5219e-01, 5.6162e-01, -1.8653e-01, 6.8789e-01, 1.3240e-01, 1.7699e-01, 1.2940e-01,
                                    -1.8520e-01, -5.5439e-01, -3.4946e-01],
                                   [3.7645e-01, 6.5475e-01, 3.5964e-01, 2.2433e-01, -1.7869e-01, -2.9047e-01,
                                    1.7615e-01, -5.3353e-01, -7.4204e-02, -2.5270e-01],
                                   [5.8095e-01, -4.6426e-04, 1.9262e-01, -5.1306e-01, -3.6811e-01, 4.4858e-01,
                                    6.2580e-01, 9.5494e-02, -6.9505e-01, 4.9500e-01],
                                   [-3.7810e-01, 1.5485e-01, -1.4735e-01, -1.5327e-01, -4.5702e-01, 3.0816e-01,
                                    -3.4280e-01, 2.1604e-01, 1.4087e-01, -5.7707e-01],
                                   [-3.8700e-01, -6.4653e-01, 6.0653e-01, -4.7297e-01, 6.8413e-02, -1.2681e-01,
                                    6.8464e-02, 6.7011e-01, 3.9950e-01, -2.0577e-01],
                                   [-1.8648e-01, -6.7198e-01, 3.8017e-01, -3.3147e-01, 5.3193e-01, -5.4952e-01,
                                    2.1774e-01, -4.6271e-01, 3.2611e-01, 6.3554e-02],
                                   [-4.5403e-01, -1.5910e-01, -7.5886e-02, 2.6313e-01, 6.8093e-01, -3.9960e-01,
                                    5.5428e-01, 1.0429e-01, 5.1322e-01, 1.9406e-01],
                                   [3.9698e-01, -5.2101e-01, 5.1372e-01, -3.9866e-01, 1.0115e-01, -4.1290e-02,
                                    -3.0980e-01, 2.1607e-01, 4.8420e-01, -1.9267e-01]]).astype(np.float32).reshape(
            [1, -1])

        whh_reverse_l0 = np.array([[-0.3231, -0.3960],
                                   [-0.1625, -0.3032],
                                   [0.3892, -0.0666],
                                   [0.0159, -0.4870],
                                   [-0.4953, 0.2278],
                                   [-0.5380, -0.5250],
                                   [0.0371, -0.4534],
                                   [-0.5452, 0.5012]]).astype(np.float32).reshape([1, -1])

        bih_reverse_l0 = np.array([0.0469, -0.0107, 0.3783, -0.2657, -0.0089, 0.5032, -0.0757, -0.2022]).astype(
            np.float32).reshape([1, -1])
        bhh_reverse_l0 = np.array([-0.6584, 0.3977, 0.5597, -0.4784, 0.5360, -0.2532, 0.5362, -0.1063]).astype(
            np.float32).reshape([1, -1])

        wih_l1 = np.array([[0.0602, 0.6977, -0.3882, 0.3734],
                           [-0.6896, -0.6014, -0.2311, 0.6433],
                           [-0.6778, -0.5100, -0.1496, 0.5774],
                           [-0.5824, 0.4656, -0.2835, -0.5688],
                           [0.5623, 0.3599, 0.1731, 0.3124],
                           [0.1492, -0.6663, -0.1099, -0.5282],
                           [0.4696, -0.1795, -0.6712, -0.3903],
                           [0.4995, 0.0709, -0.1738, 0.2822]]).astype(np.float32).reshape([1, -1])

        whh_l1 = np.array([[0.3770, 0.4139],
                           [0.5351, 0.6394],
                           [0.3901, -0.1072],
                           [0.1106, 0.1331],
                           [0.3970, 0.4693],
                           [0.2958, -0.3813],
                           [-0.3064, 0.5519],
                           [-0.2827, 0.5844]]).astype(np.float32).reshape([1, -1])

        bih_l1 = np.array([0.5242, 0.5896, 0.3709, 0.6202, 0.5008, 0.2674, 0.4356, -0.3261]).astype(np.float32).reshape(
            [1, -1])
        bhh_l1 = np.array([-0.6648, 0.6680, 0.2510, -0.1245, -0.0524, 0.5439, -0.1650, 0.5303]).astype(
            np.float32).reshape([1, -1])

        wih_reverse_l1 = np.array([[0.6477, 0.4416, 0.3803, -0.4708],
                                   [0.4497, 0.2833, -0.4739, -0.6361],
                                   [-0.5573, -0.3867, -0.0349, -0.4128],
                                   [-0.1545, 0.3720, 0.2354, -0.6090],
                                   [0.5965, 0.6301, -0.4591, -0.0120],
                                   [-0.1253, -0.1881, -0.4388, 0.4335],
                                   [0.1944, -0.1230, -0.6170, 0.1043],
                                   [-0.6700, 0.4343, 0.6474, 0.0113]]).astype(np.float32).reshape([1, -1])

        whh_reverse_l1 = np.array([[0.6576, 0.5573],
                                   [0.2318, 0.0187],
                                   [-0.6365, 0.5744],
                                   [-0.6494, -0.1820],
                                   [0.6461, -0.3344],
                                   [0.0906, -0.5405],
                                   [-0.5999, 0.5571],
                                   [-0.0488, 0.5345]]).astype(np.float32).reshape([1, -1])

        bih_reverse_l1 = np.array([-0.6058, -0.2812, -0.4449, -0.0802, 0.4931, 0.4066, 0.5960, 0.1968]).astype(
            np.float32).reshape([1, -1])
        bhh_reverse_l1 = np.array([-0.2490, -0.3402, -0.5089, -0.3875, 0.4852, -0.0402, -0.0072, -0.1017]).astype(
            np.float32).reshape([1, -1])

        '''
        weight
            layer0
                forward
                    wih            
                    whh
                reverse
                    wih
                    whh
            layer1
                forward
                    wih
                    whh
                reverse
                    wih
                    whh
            ... ...
        bias:
            layer0
                forward
                    bih
                    bhh
                reverse
                    bih
                    bhh
            layer1
                forward
                    bih
                    bhh
                reverse
                    bih
                    bhh
            ... ...
        '''
        w_np = np.concatenate(
            (wih_l0, whh_l0, wih_reverse_l0, whh_reverse_l0, wih_l1, whh_l1, wih_reverse_l1, whh_reverse_l1,
             bih_l0, bhh_l0, bih_reverse_l0, bhh_reverse_l0, bih_l1, bhh_l1, bih_reverse_l1, bhh_reverse_l1),
            axis=1).reshape([-1, 1, 1])

        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
コード例 #18
0
ファイル: test_one_dev.py プロジェクト: zsangel378/mindspore
 def __init__(self):
     super(AllToAllNet, self).__init__()
     self.matmul = P.MatMul()
     self.matmul_weight = Parameter(Tensor(np.ones([128, 32]), dtype=ms.float32), name="weight")
     self.transpose1 = P.Transpose()
コード例 #19
0
    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
        super(LstmNetWithDropout, self).__init__()

        num_directions = 1
        if bidirectional:
            num_directions = 2

        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)

        input_np = np.array([[[-2.48789445e-01, -2.18991071e-01, -8.41492534e-01, -5.73351622e-01, 8.20644796e-02,
                               4.14313585e-01, -1.30143976e+00, -4.43366140e-01, -1.21003680e-01, -2.11284861e-01],
                              [9.94045794e-01, 3.18840504e-01, 4.81898338e-01, -4.83986028e-02, -9.26419497e-02,
                               -2.57977694e-01, 1.82191110e+00, 5.95121741e-01, 6.30752742e-01, -6.01903737e-01]],

                             [[7.67166913e-01, 5.41202351e-02, -1.24094069e+00, 1.38814664e+00, 2.05845284e+00,
                               7.29744852e-01, -1.12405574e+00, 3.78702253e-01, 2.28524983e-01, 2.02445173e+00],
                              [-1.85264975e-01, -4.55119252e-01, 1.23624969e+00, 1.24347043e+00, -1.68316591e+00,
                               -3.55918944e-01, 3.07149738e-01, -3.44966322e-01, -1.08978853e-01, 1.80912763e-01]],

                             [[-6.47622466e-01, 1.31204927e+00, 6.47477210e-01, -7.93370783e-01, 3.08402872e-04,
                               -5.12097359e-01, -1.69133916e-01, 8.57838035e-01, -3.63963723e-01, 6.35978997e-01],
                              [-3.92911851e-01, 8.27334300e-02, -1.11347124e-01, 8.79961967e-01, 6.02812059e-02,
                               -3.76448452e-01, -1.48800862e+00, -9.48699772e-01, -1.24202335e+00, 1.65264118e+00]],

                             [[4.05404866e-01, 5.67396320e-02, -2.05705926e-01, -8.70196745e-02, -7.34854519e-01,
                               -1.07580565e-01, 1.33716142e+00, -1.18140256e+00, 2.66074872e+00, -3.26788813e-01],
                              [6.97183967e-01, -2.32625628e+00, 1.20393467e+00, -2.32532692e+00, 2.03347206e+00,
                               -7.58083522e-01, 1.35564697e+00, -2.32149422e-01, 9.85125721e-01, 1.00944638e+00]],

                             [[9.89606023e-01, -5.30669808e-01, -2.66087383e-01, 8.14819038e-01, 1.07067376e-01,
                               -1.76214290e+00, -5.04977465e-01, 1.94490123e+00, 5.10450959e-01, -2.29238123e-01],
                              [-1.32928836e+00, -1.18175328e-01, -5.17818272e-01, -1.45089477e-01, 7.13987231e-01,
                               -7.41293788e-01, -3.67817104e-01, 1.18039274e+00, -6.03745162e-01,
                               -5.83392143e-01]]]).astype(np.float32)

        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')

        self.h = Parameter(initializer(
            Tensor(np.array([[[-0.47240502, 1.6824378],
                              [-0.00978304, 0.8179632]]]).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='h')

        self.c = Parameter(initializer(
            Tensor(np.array([[[-0.85975164, -0.3198615],
                              [-0.9821871, 0.26311848]]]).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='c')

        wih = np.array([[0.4473, -0.5509, -0.1585, -0.6215, 0.6228, 0.3462, 0.3015, -0.3714, 0.3119, -0.1151],
                        [-0.6923, 0.1373, 0.2214, 0.2280, 0.6960, -0.6368, 0.5725, -0.1359, 0.0742, -0.6777],
                        [-0.4432, 0.6162, -0.1066, -0.6138, -0.2529, -0.5638, -0.0603, 0.3039, 0.1068, -0.5300],
                        [0.4337, -0.1215, -0.5088, -0.0045, 0.2828, 0.1411, 0.0741, 0.6936, -0.4603, 0.6986],
                        [-0.2079, -0.5518, 0.5375, -0.2168, 0.3662, 0.0948, -0.0564, -0.1808, -0.6672, -0.2410],
                        [0.5142, 0.0790, -0.1123, -0.2351, 0.3982, -0.6351, 0.5906, 0.3917, -0.0850, -0.5397],
                        [-0.4795, -0.6576, 0.5693, 0.0047, -0.6626, 0.1013, -0.4015, -0.4040, -0.2817, 0.4430],
                        [0.0251, -0.3035, -0.6026, 0.2693, -0.2749, 0.1501, -0.5778, 0.5570, -0.7065, -0.6196]]).astype(
            np.float32).reshape([1, -1])

        whh = np.array([[-0.4344, -0.2529],
                        [0.0377, 0.7046],
                        [-0.0579, -0.5240],
                        [-0.4801, -0.1149],
                        [-0.4010, -0.5614],
                        [0.4721, 0.4366],
                        [-0.4282, 0.0816],
                        [0.1574, -0.3359]]).astype(np.float32).reshape([1, -1])

        bih = np.array([0.2431, 0.5967, -0.2417, -0.4169, -0.5326, 0.5685, -0.2971, -0.4326]).astype(
            np.float32).reshape([1, -1])
        bhh = np.array([-0.1751, -0.2270, -0.3980, -0.4983, -0.3527, -0.2774, 0.6371, -0.3330]).astype(
            np.float32).reshape([1, -1])

        w_np = np.concatenate((wih, whh, bih, bhh), axis=1).reshape([-1, 1, 1])

        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
コード例 #20
0
 def __init__(self):
     super().__init__()
     self.max = P.ReduceMax()
     self.param = Parameter(Tensor(np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="weight")
     self.zero = Tensor(np.zeros(([2, 2, 2])), ms.float32)
コード例 #21
0
ファイル: optimizer.py プロジェクト: fangbaohui/mindspore
    def _init_group_params(self, parameters, learning_rate, weight_decay):
        """Init learning rate or weight decay in group params."""
        origin_dynamic_lr = self.dynamic_lr
        self._parse_group_params(parameters, learning_rate)
        if self.dynamic_lr and not origin_dynamic_lr:
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        params_store = []
        for group_param in parameters:
            if 'order_params' in group_param.keys():
                ordered_parameters = group_param['order_params']
                continue

            self.group_params += group_param['params']
            if 'lr' in group_param.keys():
                params_dynamic_lr = isinstance(group_param['lr'],
                                               (Iterable, Tensor))
                if self.dynamic_lr and not params_dynamic_lr:
                    lr = Tensor(
                        np.array([group_param['lr']] *
                                 self.dynamic_lr_length).astype(np.float32))
                else:
                    lr = self._get_single_lr(group_param['lr'])
            else:
                if self.dynamic_lr and not origin_dynamic_lr:
                    lr = Tensor(
                        np.array([self.scalar_lr] *
                                 self.dynamic_lr_length).astype(np.float32))
                else:
                    lr = learning_rate

            if 'weight_decay' in group_param.keys():
                validator.check_float_legal_value('weight_decay',
                                                  group_param['weight_decay'],
                                                  None)
                validator.check_number_range('weight_decay',
                                             group_param['weight_decay'], 0.0,
                                             float("inf"), Rel.INC_LEFT,
                                             self.cls_name)
                weight_decay_ = group_param['weight_decay'] * self.loss_scale
            else:
                weight_decay_ = weight_decay * self.loss_scale

            for key in group_param.keys():
                if key not in ('params', 'lr', 'weight_decay'):
                    logger.warning(
                        f"The optimizer cannot parse '{key}' when setting parameter groups."
                    )

            for param in group_param['params']:
                validator.check_value_type("parameter", param, [Parameter],
                                           self.cls_name)
                if param.name in params_store:
                    raise RuntimeError(
                        f"The {param.name} parameter has appeared in parameter groups."
                    )

                params_store.append(param.name)
                self.group_lr.append(Parameter(lr, name="lr_" + param.name))
                self.group_weight_decay.append(weight_decay_)

        if self.is_group_params_ordered:
            self._order_and_adjust_group_params(ordered_parameters)
コード例 #22
0
 def __init__(self):
     super(Net, self).__init__()
     self.all_gather = P.AllGather(group=NCCL_WORLD_COMM_GROUP)
     self.x = Parameter(initializer(Tensor(x), x.shape), name='x')
コード例 #23
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_positive_float(loss_scale, "loss_scale", self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self._unique = True
        self._target = context.get_context("device_target")
        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate,
                                                       'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            self.weight_decay_tensor_tuple = tuple(
                Tensor(x, mstype.float32) for x in self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            self.weight_decay_tensor = Tensor(self.weight_decay,
                                              mstype.float32)
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        # when a parameter has been unique, there is no need do another unique in optimizer.
        for param in self.parameters:
            if param.unique:
                self._unique = False
                break
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        cache_filter = lambda x: x.cache_enable
        self.cache_enable = tuple(cache_filter(x) for x in self.parameters)
        self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32)
        self.need_scale = loss_scale != 1.0
        self.global_step_increase_tensor = Tensor(1, mstype.int32)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()
        if context.get_auto_parallel_context("enable_parallel_optimizer"):
            if _get_parallel_mode(
            ) == ParallelMode.DATA_PARALLEL and context.get_context(
                    "device_target") == "Ascend":
                self.use_parallel = True
            elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \
                    and context.get_context("device_target") != "Ascend":
                raise RuntimeError(
                    "Parallel optimizer only supports Ascend in data parallel mode."
                )
            elif _get_parallel_mode() in (ParallelMode.STAND_ALONE,
                                          ParallelMode.HYBRID_PARALLEL):
                raise RuntimeError(
                    "Parallel optimizer is not supported in {}.".format(
                        _get_parallel_mode()))
            else:
                self.use_parallel = False
        else:
            self.use_parallel = False
        if self.use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError(
                    "Parallel optimizer does not support optimizer {}".format(
                        self.cls_name))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError(
                    "Parallel optimizer can not be applied when the number of parameters {} is"
                    " less than the number of devices {}".format(
                        self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(
                map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True, ) * self.param_length
コード例 #24
0
 def __init__(self):
     super(Layer2, self).__init__()
     self.z2 = Parameter(
         Tensor(np.full([128, 96], 0.6, dtype=np.float32)), name='z2')
コード例 #25
0
ファイル: thor.py プロジェクト: huxian123/mindspore
    def __init__(self,
                 params,
                 learning_rate,
                 momentum,
                 matrix_A,
                 matrix_G,
                 A_inv_max,
                 G_inv_max,
                 weight_decay=0.0,
                 loss_scale=1.0,
                 use_nesterov=False,
                 decay_filter=lambda x: x.name not in []):
        super(THOR_GPU, self).__init__(learning_rate, params, weight_decay,
                                       loss_scale)
        validator.check_value_type("momentum", momentum, [float],
                                   self.cls_name)
        if isinstance(momentum, float) and momentum < 0.0:
            raise ValueError(
                "momentum should be at least 0.0, but got momentum {}".format(
                    momentum))
        self.momentum = Parameter(Tensor(momentum, mstype.float32),
                                  name="momentum")
        self.params = self.parameters
        self.use_nesterov = check_bool(use_nesterov)
        self.moments = self.params.clone(prefix="moments", init='zeros')
        self.hyper_map = C.HyperMap()
        self.opt = _selected_ops.ApplyMomentum(use_nesterov=self.use_nesterov)

        self.feature_map = [
            1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136,
            1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136,
            1.0 / 3136, 1.0 / 3136, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784,
            1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784,
            1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 196, 1.0 / 196, 1.0 / 196,
            1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196,
            1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196,
            1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49,
            1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49,
            1.0 / 49, 1.0
        ]
        self.feature_map_new = [x**0.5 for x in self.feature_map]
        self.transpose = P.Transpose()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.matmul = P.MatMul()
        self.matrix_A = ParameterTuple(matrix_A)
        self.matrix_G = ParameterTuple(matrix_G)
        self.A_inv_max = ParameterTuple(A_inv_max)
        self.G_inv_max = ParameterTuple(G_inv_max)
        self.assign = P.Assign()
        self.mul = P.Mul()

        mean = _get_gradients_mean()
        degree = _get_device_num()

        parameter_length = len(self.feature_map)
        self.grad_reducer_thorA = DistributedGradReducerThor(
            parameter_length, ((parameter_length, ), 0), mean, degree)
        self.grad_reducer_thorG = DistributedGradReducerThor(
            parameter_length, ((parameter_length, ), 0), mean, degree)
        self.weight_decay = weight_decay
        self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
        self.update_gradient = P.UpdateThorGradient(split_dim=128)
コード例 #26
0
 def __init__(self):
     super(SwitchLayerCell, self).__init__()
     self.layers = (Layer1(), Layer2())
     self.z3 = Parameter(
         Tensor(np.full([128, 96], 0.6, dtype=np.float32)), name='z3')
コード例 #27
0
def load_checkpoint(ckpt_file_name,
                    net=None,
                    strict_load=False,
                    filter_prefix=None):
    """
    Loads checkpoint info from a specified file.

    Args:
        ckpt_file_name (str): Checkpoint file name.
        net (Cell): Cell network. Default: None
        strict_load (bool): Whether to strict load the parameter into net. If False, it will load parameter
                           in the param_dict into net with the same suffix. Default: False
        filter_prefix (Union[str, list[str], tuple[str]]): Parameters starting with the filter_prefix
            will not be loaded. Default: None.

    Returns:
        Dict, key is parameter name, value is a Parameter.

    Raises:
        ValueError: Checkpoint file is incorrect.

    Examples:
        >>> ckpt_file_name = "./checkpoint/LeNet5-2_1875.ckpt"
        >>> param_dict = load_checkpoint(ckpt_file_name, filter_prefix="conv1")
    """
    if not isinstance(ckpt_file_name, str):
        raise ValueError("The ckpt_file_name must be string.")

    if not os.path.exists(ckpt_file_name):
        raise ValueError("The checkpoint file is not exist.")

    if ckpt_file_name[-5:] != ".ckpt":
        raise ValueError("Please input the correct checkpoint file name.")

    if os.path.getsize(ckpt_file_name) == 0:
        raise ValueError(
            "The checkpoint file may be empty, please make sure enter the correct file name."
        )

    if filter_prefix is not None:
        if not isinstance(filter_prefix, (str, list, tuple)):
            raise TypeError(
                f"The type of filter_prefix must be str, list[str] or tuple[str] "
                f"when filter_prefix is not None, but got {str(type(filter_prefix))}."
            )
        if isinstance(filter_prefix, str):
            filter_prefix = (filter_prefix, )
        if not filter_prefix:
            raise ValueError(
                "The filter_prefix can't be empty when filter_prefix is list or tuple."
            )
        for index, prefix in enumerate(filter_prefix):
            if not isinstance(prefix, str):
                raise TypeError(
                    f"The type of filter_prefix must be str, list[str] or tuple[str], "
                    f"but got {str(type(prefix))} at index {index}.")

    logger.info("Execute load checkpoint process.")
    checkpoint_list = Checkpoint()

    try:
        with open(ckpt_file_name, "rb") as f:
            pb_content = f.read()
        checkpoint_list.ParseFromString(pb_content)
    except BaseException as e:
        logger.error(
            "Failed to read the checkpoint file `%s`, please check the correct of the file.",
            ckpt_file_name)
        raise ValueError(e.__str__())

    parameter_dict = {}
    try:
        param_data_list = []
        for element_id, element in enumerate(checkpoint_list.value):
            if filter_prefix is not None and _check_param_prefix(
                    filter_prefix, element.tag):
                continue
            data = element.tensor.tensor_content
            data_type = element.tensor.tensor_type
            np_type = tensor_to_np_type[data_type]
            ms_type = tensor_to_ms_type[data_type]
            element_data = np.frombuffer(data, np_type)
            param_data_list.append(element_data)
            if (element_id == len(checkpoint_list.value) - 1) or \
                    (element.tag != checkpoint_list.value[element_id + 1].tag):
                param_data = np.concatenate((param_data_list), axis=0)
                param_data_list.clear()
                dims = element.tensor.dims

                if dims == [0]:
                    if 'Float' in data_type:
                        param_data = float(param_data[0])
                    elif 'Int' in data_type:
                        param_data = int(param_data[0])
                    parameter_dict[element.tag] = Parameter(Tensor(
                        param_data, ms_type),
                                                            name=element.tag)
                elif dims == [1]:
                    parameter_dict[element.tag] = Parameter(Tensor(
                        param_data, ms_type),
                                                            name=element.tag)
                else:
                    param_dim = []
                    for dim in dims:
                        param_dim.append(dim)
                    param_value = param_data.reshape(param_dim)
                    parameter_dict[element.tag] = Parameter(Tensor(
                        param_value, ms_type),
                                                            name=element.tag)

        logger.info("Load checkpoint process finish.")

    except BaseException as e:
        logger.error("Failed to load the checkpoint file `%s`.",
                     ckpt_file_name)
        raise RuntimeError(e.__str__())

    if not parameter_dict:
        raise ValueError(
            f"The loaded parameter dict is empty after filtering, please check filter_prefix."
        )

    if net is not None:
        load_param_into_net(net, parameter_dict, strict_load)

    return parameter_dict
コード例 #28
0
    def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
        super(BiLstmNet, self).__init__()

        num_directions = 1
        if bidirectional:
            num_directions = 2

        self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)

        input_np = np.array([[[-1.7322, 1.6642, -1.1861, 0.2955, -0.7907, 0.2982, -1.3413, 1.0665, -0.0436, -0.1883],
                              [0.2195, 0.5917, -0.6739, 0.2388, -0.5364, -1.3309, -0.6018, -0.3081, -0.9648, -1.1627]],

                             [[-0.5094, -2.6025, -0.9302, -1.1937, 0.6501, -0.1903, -0.0661, 0.1080, 0.9829, -0.2280],
                              [1.3961, 0.2239, -0.1947, -0.3206, 0.5791, 0.3396, 0.1728, -1.2007, -1.0994, -1.3278]],

                             [[0.1870, -1.1090, -0.9705, 0.2207, 0.3743, 0.1158, -0.5443, -0.5559, 0.1538, -0.3975],
                              [-0.2347, -0.1245, -0.2335, 0.3164, 1.0997, -0.3928, -1.8517, 1.1136, -1.5051, -0.0071]],

                             [[1.2739, 2.5438, -0.4289, -0.7981, -1.3682, -2.2509, 0.2028, 1.3410, 2.9502, -1.1650],
                              [0.1254, 0.2726, 0.0251, 0.9323, 0.7315, 0.8231, -0.2123, -0.6885, 0.9893, -0.2047]],

                             [[0.1870, -0.9066, 0.7155, 0.5438, -0.9757, -0.5828, -0.3417, 1.5681, 1.0326, -0.0179],
                              [-0.7746, -1.0695, -0.5278, 2.5307, -0.1002, -1.5773, 0.7717, 1.0266, -0.0798,
                               1.2333]]]).astype(np.float32)

        self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')

        self.h = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='h')

        self.c = Parameter(initializer(
            Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
            [num_layers * num_directions, batch_size, hidden_size]), name='c')

        wih = np.array([[-0.2959, -0.1142, 0.3662, 0.5406, 0.1738, 0.2697, -0.6960, -0.0464, 0.3486, 0.1888],
                        [0.3043, 0.1505, -0.1207, -0.2456, 0.2735, 0.6673, -0.3352, -0.6153, -0.5731, -0.2726],
                        [-0.2657, -0.5570, 0.6785, -0.1861, -0.0652, 0.5757, 0.6442, -0.4068, -0.3260, 0.7054],
                        [0.6607, 0.6927, -0.1354, 0.2484, 0.2053, 0.5743, -0.0212, 0.3340, -0.5685, -0.5668],
                        [0.6701, -0.3013, -0.1202, -0.4200, -0.4280, -0.6329, -0.6074, -0.4997, -0.6215, -0.6259],
                        [0.0299, -0.6071, -0.4683, -0.3363, -0.0044, -0.0007, 0.2700, 0.0202, -0.2880, -0.6869],
                        [0.3025, -0.2461, -0.5128, 0.6327, -0.1438, -0.5100, 0.1924, 0.2023, 0.3129, 0.2271],
                        [0.3777, 0.0546, 0.4790, -0.1895, 0.3588, 0.4490, 0.6850, 0.6240, -0.2739, -0.4474]]).astype(
            np.float32).reshape([1, -1])

        whh = np.array([[0.6346, -0.6366],
                        [-0.0248, -0.6156],
                        [-0.3821, 0.6327],
                        [-0.6132, -0.5071],
                        [0.4029, 0.0906],
                        [-0.5671, 0.2556],
                        [0.0268, -0.4347],
                        [0.1152, -0.3124]]).astype(np.float32).reshape([1, -1])

        bih = np.array([-0.3839, -0.5365, -0.6691, 0.1697, -0.1564, -0.0451, -0.5921, -0.5367]).astype(
            np.float32).reshape([1, -1])
        bhh = np.array([0.5952, -0.4905, 0.0423, -0.0293, -0.6638, 0.4348, -0.4291, -0.5541]).astype(
            np.float32).reshape([1, -1])

        wih_reverse = np.array([[-0.2938, 0.0048, 0.2704, -0.3387, -0.4529, -0.2586, 0.1352, -0.1208, -0.1423, -0.0220],
                                [-0.3701, 0.0201, -0.0255, 0.1340, -0.1938, -0.7056, -0.2303, 0.4814, 0.3636, -0.5018],
                                [-0.0284, -0.0108, -0.5788, 0.2389, 0.2604, 0.6774, -0.5525, 0.6265, -0.6126, 0.3197],
                                [-0.6906, 0.6991, -0.6138, 0.0044, 0.5714, 0.4176, 0.5451, -0.5114, -0.2286, 0.1105],
                                [0.3547, 0.6233, -0.4543, -0.6799, 0.1109, 0.5601, 0.0212, 0.6926, 0.0597, -0.4383],
                                [-0.1370, -0.5852, 0.0596, 0.5494, 0.5789, -0.0534, 0.1092, 0.3544, -0.1571, 0.4444],
                                [-0.5886, -0.4765, -0.3837, -0.6634, 0.0963, -0.1385, -0.0837, -0.1354, 0.0547,
                                 -0.2870],
                                [0.2049, -0.7057, -0.1736, 0.4724, 0.1957, -0.3037, 0.4626, -0.6465, 0.4575,
                                 0.4230]]).astype(np.float32).reshape([1, -1])

        whh_reverse = np.array([[0.2339, -0.0307],
                                [-0.5850, 0.6328],
                                [0.5856, -0.5601],
                                [0.4875, -0.6929],
                                [0.0314, 0.2531],
                                [-0.2523, 0.3244],
                                [0.5199, 0.5146],
                                [0.3968, 0.4511]]).astype(np.float32).reshape([1, -1])

        bih_reverse = np.array([-0.1760, 0.2828, 0.2450, -0.4016, -0.4664, 0.4031, -0.1945, -0.1509]).astype(
            np.float32).reshape([1, -1])
        bhh_reverse = np.array([0.6427, 0.4806, 0.6278, 0.1596, 0.0038, -0.3418, 0.0549, -0.3900]).astype(
            np.float32).reshape([1, -1])

        w_np = np.concatenate((wih, whh, wih_reverse, whh_reverse, bih, bhh, bih_reverse, bhh_reverse), axis=1).reshape(
            [-1, 1, 1])

        self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
コード例 #29
0
 def __init__(self, input_x):
     super(ScatterSubNet, self).__init__()
     self.input_x = Parameter(input_x, name="para")
     self.scatter_sub = P.ScatterSub()
コード例 #30
0
ファイル: optimizer.py プロジェクト: wenming2014/mindspore
    def _init_group_params(self, parameters, learning_rate, weight_decay):
        """Init learning rate or weight decay in group params."""
        origin_dynamic_lr = self.dynamic_lr
        if self.dynamic_lr:
            dynamic_lr_length = learning_rate.size()
        else:
            dynamic_lr_length = 0

        for group_param in parameters:
            lr_length = dynamic_lr_length
            if 'lr' in group_param.keys():
                self._get_single_lr(group_param['lr'])
                if isinstance(group_param['lr'], Iterable):
                    lr_length = len(group_param['lr'])
                    self.dynamic_lr = True
                elif isinstance(group_param['lr'], Tensor):
                    lr_length = group_param['lr'].size()
                    self.dynamic_lr = True
            if dynamic_lr_length not in (lr_length, 0):
                raise ValueError(
                    "The dynamic learning rate in group should be the same size."
                )
            dynamic_lr_length = lr_length

        if self.dynamic_lr and not origin_dynamic_lr:
            self.gather = P.GatherV2()
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        params_store = []
        for group_param in parameters:
            self.params += group_param['params']
            if 'lr' in group_param.keys():
                params_dynamic_lr = isinstance(group_param['lr'],
                                               (Iterable, Tensor))

                if self.dynamic_lr and not params_dynamic_lr:
                    lr = Tensor(
                        np.array([group_param['lr']] *
                                 dynamic_lr_length).astype(np.float32))
                else:
                    lr = self._get_single_lr(group_param['lr'])
            else:
                if self.dynamic_lr and not origin_dynamic_lr:
                    lr = Tensor(
                        np.array([self.scalar_lr] * dynamic_lr_length).astype(
                            np.float32))
                else:
                    lr = learning_rate

            if 'weight_decay' in group_param.keys():
                validator.check_float_legal_value('weight_decay',
                                                  group_param['weight_decay'],
                                                  None)
                validator.check_number_range('weight_decay',
                                             group_param['weight_decay'], 0.0,
                                             float("inf"), Rel.INC_LEFT,
                                             self.cls_name)
                weight_decay_ = group_param['weight_decay'] * self.loss_scale
            else:
                weight_decay_ = weight_decay * self.loss_scale

            for param in group_param['params']:
                if param in params_store:
                    raise RuntimeError(
                        f"The {param.name} parameter has appeared in parameter groups."
                    )
                params_store.append(param)
                self.group_lr.append(Parameter(lr, name="lr_" + param.name))
                self.group_weight_decay.append(weight_decay_)