Ejemplo n.º 1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True,
                 activation=None):
        super(Dense, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.reshape = P.Reshape()
        self.shape_op = P.Shape()


        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("Weight init shape error.")
        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        self.bias = None
        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("Bias init shape error.")
            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
            self.bias_add = P.BiasAdd()

        self.matmul = P.MatMul(transpose_b=True)
        self.activation = get_activation(activation) if isinstance(activation, str) else activation
        if activation is not None and not isinstance(self.activation, (Cell, Primitive)):
            raise TypeError("The activation must be str or Cell or Primitive,"" but got {}.".format(activation))
        self.activation_flag = self.activation is not None
Ejemplo n.º 2
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False):
        super(LSTM, self).__init__()
        validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
        validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
        validator.check_positive_int(num_layers, "num_layers", self.cls_name)

        self.batch_first = batch_first
        self.transpose = P.Transpose()
        self.lstm = P.LSTM(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           has_bias=has_bias,
                           bidirectional=bidirectional,
                           dropout=float(dropout))

        weight_size = 0
        gate_size = 4 * hidden_size
        num_directions = 2 if bidirectional else 1
        for layer in range(num_layers):
            input_layer_size = input_size if layer == 0 else hidden_size * num_directions
            increment_size = gate_size * input_layer_size
            increment_size += gate_size * hidden_size
            if has_bias:
                increment_size += 2 * gate_size
            weight_size += increment_size * num_directions
        stdv = 1 / math.sqrt(hidden_size)
        w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32)
        self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
 def __init__(self, model, train_dataset, task_type, num_classes=None, epochs=1,
              epi_uncer_model_path=None, ale_uncer_model_path=None, save_model=False):
     self.epi_model = model
     self.ale_model = deepcopy(model)
     self.epi_train_dataset = train_dataset
     self.ale_train_dataset = train_dataset
     self.task_type = task_type
     self.epochs = Validator.check_positive_int(epochs)
     self.epi_uncer_model_path = epi_uncer_model_path
     self.ale_uncer_model_path = ale_uncer_model_path
     self.save_model = Validator.check_bool(save_model)
     self.epi_uncer_model = None
     self.ale_uncer_model = None
     self.concat = P.Concat(axis=0)
     self.sum = P.ReduceSum()
     self.pow = P.Pow()
     if not isinstance(model, Cell):
         raise TypeError('The model should be Cell type.')
     if task_type not in ('regression', 'classification'):
         raise ValueError('The task should be regression or classification.')
     if task_type == 'classification':
         self.num_classes = Validator.check_positive_int(num_classes)
     else:
         self.num_classes = num_classes
     if save_model:
         if epi_uncer_model_path is None or ale_uncer_model_path is None:
             raise ValueError("If save_model is True, the epi_uncer_model_path and "
                              "ale_uncer_model_path should not be None.")
Ejemplo n.º 4
0
def avg_pooling(x, pool_h, pool_w, stride):
    """
    Applies average pooling over an input array.

    Args:
        x (numpy.ndarray): The input array to be average pooled.
        pool_h (int): Height of the pooling window.
        pool_w (int): Width of the pooling window.
        stride (int): The stride of the sliding window.

    Returns:
        numpy.ndarray, an output array after applying average pooling on input array.
    """
    validator.check_positive_int(stride, "stride")
    num, channel, height, width = x.shape
    out_h = (height - pool_h) // stride + 1
    out_w = (width - pool_w) // stride + 1

    col = im2col(x, pool_h, pool_w, stride)
    col = col.reshape(-1, pool_h * pool_w)

    out = np.mean(col, axis=1)
    out = out.reshape((num, out_h, out_w, channel)).transpose(0, 3, 1, 2)

    return out
Ejemplo n.º 5
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor_GPU, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]))

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
        self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.cube_matmul = P.MatMul(transpose_a=True)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.cast = P.Cast()
        self.gather = P.Gather()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.add = P.Add()
        self.sqrt = P.Sqrt()
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
Ejemplo n.º 6
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True,
                 activation=None):
        super(Dense, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)

        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
               weight_init.shape[1] != in_channels:
                raise ValueError("Weight init shape error.")
        self.weight = Parameter(initializer(weight_init,
                                            [out_channels, in_channels]),
                                name="weight")

        self.bias = None
        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("Bias init shape error.")
            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name="bias")
            self.bias_add = P.BiasAdd()

        self.matmul = P.MatMul(transpose_b=True)
        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
Ejemplo n.º 7
0
    def __init__(self,
                 num_groups,
                 num_channels,
                 eps=1e-05,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros'):
        super(GroupNorm, self).__init__()
        self.num_groups = validator.check_positive_int(num_groups)
        self.num_channels = validator.check_positive_int(num_channels)
        if num_channels % num_groups != 0:
            raise ValueError("num_channels should be divided by num_groups")
        self.eps = validator.check_value_type('eps', eps, (float, ),
                                              type(self).__name__)
        self.affine = validator.check_bool(affine)

        gamma = initializer(gamma_init, num_channels)
        beta = initializer(beta_init, num_channels)
        if self.affine:
            self.gamma = Parameter(gamma, name='gamma')
            self.beta = Parameter(beta, name='beta')
        else:
            self.gamma = gamma
            self.beta = beta
        self.shape = F.shape
        self.reshape = F.reshape
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = F.square
        self.reduce_sum = P.ReduceSum(keep_dims=True)
        self.sqrt = P.Sqrt()
Ejemplo n.º 8
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True):
        super(GNNFeatureTransform, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)

        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init,
                                            [out_channels, in_channels]),
                                name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()
Ejemplo n.º 9
0
 def __init__(self,
              features,
              biases,
              ftr_dims,
              num_class,
              num_nodes,
              hidden_units,
              num_heads,
              attn_drop=0.0,
              ftr_drop=0.0,
              activation=nn.ELU(),
              residual=False):
     super(GAT, self).__init__()
     self.features = Tensor(features)
     self.biases = Tensor(biases)
     self.ftr_dims = Validator.check_positive_int(ftr_dims)
     self.num_class = Validator.check_positive_int(num_class)
     self.num_nodes = Validator.check_positive_int(num_nodes)
     self.hidden_units = hidden_units
     self.num_heads = num_heads
     self.attn_drop = attn_drop
     self.ftr_drop = ftr_drop
     self.activation = activation
     self.residual = Validator.check_bool(residual)
     self.layers = []
     # first layer
     self.layers.append(
         AttentionAggregator(self.ftr_dims,
                             self.hidden_units[0],
                             self.num_heads[0],
                             self.ftr_drop,
                             self.attn_drop,
                             self.activation,
                             residual=False))
     # intermediate layer
     for i in range(1, len(self.hidden_units)):
         self.layers.append(
             AttentionAggregator(self.hidden_units[i - 1] *
                                 self.num_heads[i - 1],
                                 self.hidden_units[i],
                                 self.num_heads[i],
                                 self.ftr_drop,
                                 self.attn_drop,
                                 self.activation,
                                 residual=self.residual))
     # output layer
     self.layers.append(
         AttentionAggregator(self.hidden_units[-1] * self.num_heads[-2],
                             self.num_class,
                             self.num_heads[-1],
                             self.ftr_drop,
                             self.attn_drop,
                             activation=None,
                             residual=False,
                             output_transform='sum'))
     self.layers = nn.layer.CellList(self.layers)
Ejemplo n.º 10
0
    def set_grad_accumulation_step(self, grad_accumulation_step):
        """
        Set grad accumulation step.

        Args:
            grad_accumulation_step (int): The grad accumulation step.
        """
        self.check_context_handle()
        Validator.check_positive_int(grad_accumulation_step)
        self._context_handle.set_grad_accumulation_step(grad_accumulation_step)
Ejemplo n.º 11
0
 def __init__(self,
              vocab_size,
              embedding_size,
              param_init='normal',
              target='CPU',
              slice_mode='batch_slice',
              manual_shapes=None):
     super(EmbeddingLookup, self).__init__()
     self.target = target
     if target not in ('CPU', 'DEVICE'):
         raise ValueError(
             'Attr \'target\' of \'EmbeddingLookup\' Op passed ' +
             str(target) +
             ', should be one of values in \'CPU\', \'DEVICE\'.')
     self.gatherv2 = P.GatherV2()
     self.embeddinglookup = P.EmbeddingLookup().add_prim_attr(
         'primitive_target', 'CPU')
     self.embedding_table = Parameter(initializer(
         param_init, [vocab_size, embedding_size]),
                                      name='embedding_table')
     parallel_mode = _get_parallel_mode()
     is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL,
                                          ParallelMode.AUTO_PARALLEL)
     if slice_mode == "field_slice" and is_auto_parallel:
         if not manual_shapes:
             raise ValueError(
                 "in slice field mode, the manual_shapes should not be none"
             )
         if not isinstance(manual_shapes, tuple):
             raise TypeError(
                 "manual_shapes type must be tuple(int) cannot be {}!".
                 format(type(manual_shapes)))
         for dim in manual_shapes:
             validator.check_positive_int(dim, 'manual shape dim',
                                          self.cls_name)
         self.gatherv2.add_prim_attr("manual_split", manual_shapes)
         self.embeddinglookup.add_prim_attr("manual_split", manual_shapes)
         self.gatherv2.shard(((get_group_size(), 1), (1, get_group_size())))
         self.embeddinglookup.shard(
             ((get_group_size(), 1), (1, get_group_size())))
     elif slice_mode == "table_row_slice" and is_auto_parallel:
         self.gatherv2.shard(((get_group_size(), 1), (1, 1)))
         self.embeddinglookup.shard(((get_group_size(), 1), (1, 1)))
     elif slice_mode == "table_column_slice" and is_auto_parallel:
         self.gatherv2.shard(((1, get_group_size()), (1, 1)))
         self.embeddinglookup.shard(((1, get_group_size()), (1, 1)))
     elif slice_mode == "batch_slice" and is_auto_parallel:
         self.gatherv2.shard(((1, 1), (get_group_size(), 1)))
         self.embeddinglookup.shard(((1, 1), (get_group_size(), 1)))
     else:
         if is_auto_parallel:
             raise ValueError(
                 "slice_mode should support mode in nn.EmbeddingLookup, but get "
                 + str(slice_mode))
Ejemplo n.º 12
0
def max_pool_with_argmax(x, pool_h, pool_w, stride):
    """Max pooling with argmax."""
    validator.check_positive_int(stride, "stride")
    num, channel, height, width = x.shape
    out_h = (height - pool_h) // stride + 1
    out_w = (width - pool_w) // stride + 1
    col = im2col(x, pool_h, pool_w, stride)
    col = col.reshape(-1, pool_h * pool_w)
    out = np.max(col, axis=1)
    out_argmax = np.argmax(col, axis=1)
    out = out.reshape((num, out_h, out_w, channel)).transpose(0, 3, 1, 2)
    out_argmax = out_argmax.reshape(
        (num, out_h, out_w, channel)).transpose(0, 3, 1, 2)
    return out, out_argmax
Ejemplo n.º 13
0
 def __init__(self,
              num_features,
              eps=1e-5,
              momentum=0.9,
              affine=True,
              gamma_init='ones',
              beta_init='zeros',
              moving_mean_init='zeros',
              moving_var_init='ones',
              use_batch_statistics=None,
              device_num_each_group=2):
     super(GlobalBatchNorm, self).__init__(num_features,
                                           eps,
                                           momentum,
                                           affine,
                                           gamma_init,
                                           beta_init,
                                           moving_mean_init,
                                           moving_var_init,
                                           use_batch_statistics,
                                           device_num_each_group,
                                           input_dims='both')
     self.group = validator.check_positive_int(device_num_each_group)
     if self.group <= 1:
         raise ValueError("the number of group must be greater than 1.")
Ejemplo n.º 14
0
    def run(self, train_dataset, epochs=10):
        """
        Optimize the parameters by training the probability network, and return the trained network.

        Args:
            epochs (int): Total number of iterations on the data. Default: 10.
            train_dataset (Dataset): A training dataset iterator.

        Outputs:
            Cell, the trained probability network.
        """
        epochs = Validator.check_positive_int(epochs)
        train_net = TrainOneStepCell(self.net_with_loss, self.optimizer)
        train_net.set_train()
        for _ in range(1, epochs + 1):
            train_loss = 0
            dataset_size = 0
            for data in train_dataset.create_dict_iterator(num_epochs=1):
                x = Tensor(data['image'], dtype=mstype.float32)
                y = Tensor(data['label'], dtype=mstype.int32)
                dataset_size += len(x)
                loss = train_net(x, y).asnumpy()
                train_loss += loss
            self._loss = train_loss / dataset_size
        model = self.net_with_loss.backbone_network
        return model
Ejemplo n.º 15
0
def piecewise_constant_lr(milestone, learning_rates):
    r"""
    Get piecewise constant learning rate.

    Calculate learning rate by given `milestone` and `learning_rates`. Let the value of `milestone` be
    :math:`(M_1, M_2, ..., M_N)` and the value of `learning_rates` be :math:`(x_1, x_2, ..., x_N)`. N is the length of
    `milestone`. Let the output learning rate be `y`.

    .. math::
        y[i] = x_t,\ for\ i \in [M_{t-1}, M_t)

    Args:
        milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list.
            Every element is a milestone step, and must be greater than 0.
        learning_rates (Union[list[float], tuple[float]]): A list of learning rates.

    Returns:
        list[float]. The size of list is :math:`M_N`.

    Examples:
        >>> milestone = [2, 5, 10]
        >>> learning_rates = [0.1, 0.05, 0.01]
        >>> piecewise_constant_lr(milestone, learning_rates)
        [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01]
    """
    validator.check_value_type('milestone', milestone, (tuple, list), None)
    validator.check_value_type('learning_rates', learning_rates, (tuple, list),
                               None)
    if len(milestone) != len(learning_rates):
        raise ValueError(
            'The size of `milestone` must be same with the size of `learning_rates`.'
        )

    lr = []
    last_item = 0
    for i, item in enumerate(milestone):
        validator.check_positive_int(item, f'milestone[{i}]')
        validator.check_float_legal_value(f'learning_rates[{i}]',
                                          learning_rates[i], None)
        if item < last_item:
            raise ValueError(
                f'The value of milestone[{i}] must be greater than milestone[{i - 1}]'
            )
        lr += [learning_rates[i]] * (item - last_item)
        last_item = item

    return lr
Ejemplo n.º 16
0
    def __init__(self, channel=1, w=0.25):
        super(PReLU, self).__init__()
        validator.check_positive_int(channel, 'channel', self.cls_name)
        if isinstance(w, (np.float32, float)):
            tmp = np.empty((channel,), dtype=np.float32)
            tmp.fill(w)
            w = Tensor(tmp)
        elif isinstance(w, list):
            w = Tensor(w)

        if not isinstance(w, Tensor):
            raise TypeError("w only support np.float32, float, list or Tensor type.")

        self.w = Parameter(initializer(w, [channel]), name='a')
        self.prelu = P.PReLU()
        self.relu = P.ReLU()
        self.assign = P.Assign()
Ejemplo n.º 17
0
 def __init__(self, encoder, decoder, hidden_size, latent_size):
     super(VAE, self).__init__()
     self.encoder = encoder
     self.decoder = decoder
     if (not isinstance(encoder, Cell)) or (not isinstance(decoder, Cell)):
         raise TypeError('The encoder and decoder should be Cell type.')
     self.hidden_size = Validator.check_positive_int(hidden_size)
     self.latent_size = Validator.check_positive_int(latent_size)
     if hidden_size < latent_size:
         raise ValueError('The latent_size should be less than or equal to the hidden_size.')
     self.normal = C.normal
     self.exp = P.Exp()
     self.reshape = P.Reshape()
     self.shape = P.Shape()
     self.to_tensor = P.ScalarToArray()
     self.dense1 = Dense(self.hidden_size, self.latent_size)
     self.dense2 = Dense(self.hidden_size, self.latent_size)
     self.dense3 = Dense(self.latent_size, self.hidden_size)
Ejemplo n.º 18
0
    def __init__(self,
                 in_channel,
                 out_channel,
                 in_drop_ratio=0.0,
                 coef_drop_ratio=0.0,
                 residual=False,
                 coef_activation=nn.LeakyReLU(),
                 activation=nn.ELU()):
        super(AttentionHead, self).__init__()
        self.in_channel = Validator.check_positive_int(in_channel)
        self.out_channel = Validator.check_positive_int(out_channel)
        self.in_drop_ratio = in_drop_ratio
        self.in_drop = nn.Dropout(keep_prob=1 - in_drop_ratio)
        self.in_drop_2 = nn.Dropout(keep_prob=1 - in_drop_ratio)
        self.feature_transform = GNNFeatureTransform(
            in_channels=self.in_channel,
            out_channels=self.out_channel,
            has_bias=False,
            weight_init='XavierUniform')

        self.f_1_transform = GNNFeatureTransform(in_channels=self.out_channel,
                                                 out_channels=1,
                                                 weight_init='XavierUniform')
        self.f_2_transform = GNNFeatureTransform(in_channels=self.out_channel,
                                                 out_channels=1,
                                                 weight_init='XavierUniform')
        self.softmax = nn.Softmax()

        self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio)
        self.matmul = P.MatMul()
        self.bias_add = P.BiasAdd()
        self.bias = Parameter(initializer('zeros', self.out_channel),
                              name='bias')
        self.residual = Validator.check_bool(residual)
        if self.residual:
            if in_channel != out_channel:
                self.residual_transform_flag = True
                self.residual_transform = GNNFeatureTransform(
                    in_channels=self.in_channel, out_channels=self.out_channel)
            else:
                self.residual_transform = None
        self.coef_activation = coef_activation
        self.activation = activation
Ejemplo n.º 19
0
    def _train(self,
               epoch,
               train_dataset,
               callbacks=None,
               dataset_sink_mode=True):
        """
        Training.

        Args:
            epoch (int): Total number of iterations on the data.
            train_dataset (Dataset): A training dataset iterator. If there is no
                                     loss_fn, a tuple with multiply data (data1, data2, data3, ...) will be
                                     returned and passed to the network. Otherwise, a tuple (data, label) will
                                     be returned, and the data and label are passed to the network and loss
                                     function respectively.
            callbacks (list): List of callback object. Callbacks which should be executed while training. Default: None.
            dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True.
                                      Configure pynative mode, the training process will be performed with
                                      dataset not sink.
        """
        epoch = Validator.check_positive_int(epoch)
        self._train_network.set_train()

        if self._parameter_broadcast:
            self._train_network.set_broadcast_flag()

        # build callback list
        cb_params = _InternalCallbackParam()
        cb_params.train_network = self._train_network
        cb_params.epoch_num = epoch
        cb_params.batch_num = train_dataset.get_dataset_size()
        cb_params.mode = "train"
        cb_params.loss_fn = self._loss_fn
        cb_params.optimizer = self._optimizer
        cb_params.parallel_mode = self._parallel_mode
        cb_params.device_number = self._device_number
        cb_params.train_dataset = train_dataset
        cb_params.list_callback = callbacks

        with _CallbackManager(callbacks) as list_callback:
            if not dataset_sink_mode:
                self._train_process(epoch, train_dataset, list_callback,
                                    cb_params)
            elif context.get_context("mode") == context.PYNATIVE_MODE:
                logger.warning(
                    "The pynative mode cannot support dataset sink mode currently."
                    "So the training process will be performed with dataset not sink."
                )
                self._train_process(epoch, train_dataset, list_callback,
                                    cb_params)
            else:
                self._train_dataset_sink_process(epoch, train_dataset,
                                                 list_callback, cb_params)
Ejemplo n.º 20
0
 def __init__(self, encoder, decoder, hidden_size, latent_size, num_classes):
     super(ConditionalVAE, self).__init__()
     self.encoder = encoder
     self.decoder = decoder
     if (not isinstance(encoder, Cell)) or (not isinstance(decoder, Cell)):
         raise TypeError('The encoder and decoder should be Cell type.')
     self.hidden_size = Validator.check_positive_int(hidden_size)
     self.latent_size = Validator.check_positive_int(latent_size)
     if hidden_size < latent_size:
         raise ValueError('The latent_size should be less than or equal to the hidden_size.')
     self.num_classes = Validator.check_positive_int(num_classes)
     self.normal = C.normal
     self.exp = P.Exp()
     self.reshape = P.Reshape()
     self.shape = P.Shape()
     self.concat = P.Concat(axis=1)
     self.to_tensor = P.ScalarToArray()
     self.one_hot = OneHot(depth=num_classes)
     self.dense1 = Dense(self.hidden_size, self.latent_size)
     self.dense2 = Dense(self.hidden_size, self.latent_size)
     self.dense3 = Dense(self.latent_size + self.num_classes, self.hidden_size)
Ejemplo n.º 21
0
def _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair):
    validator.check_positive_int(total_step, 'total_step')
    validator.check_positive_int(step_per_epoch, 'step_per_epoch')
    validator.check_positive_int(decay_epoch, 'decay_epoch')
    validator.check_positive_float(learning_rate, 'learning_rate')
    validator.check_is_float(learning_rate, 'learning_rate')
    validator.check_positive_float(decay_rate, 'decay_rate')
    validator.check_is_float(decay_rate, 'decay_rate')
    validator.check_value_type('is_stair', is_stair, [bool])
Ejemplo n.º 22
0
    def generate_sample(self, generate_nums, shape):
        """
        Randomly sample from latent space to generate samples.

        Args:
            generate_nums (int): The number of samples to generate.
            shape(tuple): The shape of sample, it must be (generate_nums, C, H, W) or (-1, C, H, W).

        Returns:
            Tensor, the generated samples.
        """
        generate_nums = Validator.check_positive_int(generate_nums)
        if not isinstance(shape, tuple) or len(shape) != 4 or (shape[0] != -1 and shape[0] != generate_nums):
            raise ValueError('The shape should be (generate_nums, C, H, W) or (-1, C, H, W).')
        sample_z = self.normal((generate_nums, self.latent_size), self.to_tensor(0.0), self.to_tensor(1.0), seed=0)
        sample = self._decode(sample_z)
        sample = self.reshape(sample, shape)
        return sample
Ejemplo n.º 23
0
def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch):
    r"""
    Calculate learning rate base on cosine decay function.

    For the i-th step, the formula of computing decayed_learning_rate[i] is:

    .. math::
        decayed\_learning\_rate[i] = min\_learning\_rate + 0.5 * (max\_learning\_rate - min\_learning\_rate) *
        (1 + cos(\frac{current\_epoch}{decay\_epoch}\pi))

    Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`.

    Args:
        min_lr (float): The minimum value of learning rate.
        max_lr (float): The maximum value of learning rate.
        total_step (int): The total number of steps.
        step_per_epoch (int): The number of steps in per epoch.
        decay_epoch (int): A value used to calculate decayed learning rate.

    Returns:
        list[float]. The size of list is `total_step`.

    Examples:
        >>> min_lr = 0.01
        >>> max_lr = 0.1
        >>> total_step = 6
        >>> step_per_epoch = 2
        >>> decay_epoch = 2
        >>> output = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch)
        >>> print(output)
        [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01]
    """
    if not isinstance(min_lr, float):
        raise TypeError("min_lr must be float.")
    validator.check_non_negative_float(min_lr, "min_lr", None)
    validator.check_positive_float(max_lr, 'max_lr')
    validator.check_is_float(max_lr, 'max_lr')
    validator.check_positive_int(total_step, 'total_step')
    validator.check_positive_int(step_per_epoch, 'step_per_epoch')
    validator.check_positive_int(decay_epoch, 'decay_epoch')
    if min_lr >= max_lr:
        raise ValueError('`max_lr` should be greater than `min_lr`.')

    delta = 0.5 * (max_lr - min_lr)
    lr = []
    for i in range(total_step):
        tmp_epoch = min(math.floor(i / step_per_epoch), decay_epoch)
        lr.append(min_lr + delta *
                  (1 + math.cos(math.pi * tmp_epoch / decay_epoch)))
    return lr
Ejemplo n.º 24
0
    def generate_sample(self, sample_y, generate_nums, shape):
        """
        Randomly sample from the latent space to generate samples.

        Args:
            sample_y (Tensor): Define the label of samples. Tensor of shape (generate_nums, ) and type mindspore.int32.
            generate_nums (int): The number of samples to generate.
            shape(tuple): The shape of sample, which must be the format of (generate_nums, C, H, W) or (-1, C, H, W).

        Returns:
            Tensor, the generated samples.
        """
        generate_nums = Validator.check_positive_int(generate_nums)
        if not isinstance(shape, tuple) or len(shape) != 4 or (shape[0] != -1 and shape[0] != generate_nums):
            raise ValueError('The shape should be (generate_nums, C, H, W) or (-1, C, H, W).')
        sample_z = self.normal((generate_nums, self.latent_size), self.to_tensor(0.0), self.to_tensor(1.0), seed=0)
        sample_y = self.one_hot(sample_y)
        sample_c = self.concat((sample_z, sample_y))
        sample = self._decode(sample_c)
        sample = self.reshape(sample, shape)
        return sample
Ejemplo n.º 25
0
def warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch):
    r"""
    Get learning rate warming up.

    For the i-th step, the formula of computing warmup_learning_rate[i] is:

    .. math::
        warmup\_learning\_rate[i] = learning\_rate * tmp\_epoch / tmp\_warmup\_epoch

    Where :math:`tmp\_epoch=min(current\_epoch, warmup\_epoch),\ current\_epoch=floor(\frac{i}{step\_per\_epoch})`

    Args:
        learning_rate (float): The initial value of learning rate.
        warmup_steps (int): The warm up steps of learning rate.

    Inputs:
        Tensor. The current step number.

    Returns:
        Tensor. The learning rate value for the current step.

    Examples:
        >>> learning_rate = 0.1
        >>> total_step = 6
        >>> step_per_epoch = 2
        >>> warmup_epoch = 2
        >>> output = warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch)
        >>> print(output)
        [0.0, 0.0, 0.05, 0.05, 0.1, 0.1]
    """
    if not isinstance(learning_rate, float):
        raise TypeError("learning_rate must be float.")
    validator.check_non_negative_float(learning_rate, "learning_rate", None)
    validator.check_positive_int(warmup_epoch, 'warmup_epoch')
    validator.check_positive_int(total_step, 'total_step')
    validator.check_positive_int(step_per_epoch, 'step_per_epoch')

    function = lambda x, y: (x, min(x, y))

    lr = []
    for i in range(total_step):
        current_epoch = math.floor(i / step_per_epoch)
        warmup_epoch, tmp_epoch = function(warmup_epoch, current_epoch)
        lr.append(learning_rate * tmp_epoch / warmup_epoch)
    return lr
Ejemplo n.º 26
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False):
        super(LSTM, self).__init__()
        validator.check_value_type("batch_first", batch_first, [bool],
                                   self.cls_name)
        validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
        validator.check_positive_int(num_layers, "num_layers", self.cls_name)
        self.is_ascend = context.get_context("device_target") == "Ascend"

        self.batch_first = batch_first
        self.transpose = P.Transpose()
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.lstm = P.LSTM(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           has_bias=has_bias,
                           bidirectional=bidirectional,
                           dropout=float(dropout))

        weight_size = 0
        gate_size = 4 * hidden_size
        stdv = 1 / math.sqrt(hidden_size)
        num_directions = 2 if bidirectional else 1
        if self.is_ascend:
            self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0)
            self.concat = P.Concat(axis=0)
            self.concat_2dim = P.Concat(axis=2)
            self.cast = P.Cast()
            self.shape = P.Shape()
            if dropout != 0:
                self.dropout_op = nn.Dropout(float(dropout))
            b0 = np.zeros(gate_size, dtype=np.float16)
            self.w_list = []
            self.b_list = []
            self.rnns_fw = P.DynamicRNN(forget_bias=0.0)
            self.rnns_bw = P.DynamicRNN(forget_bias=0.0)

            for layer in range(num_layers):
                w_shape = input_size if layer == 0 else (num_directions *
                                                         hidden_size)
                w_np = np.random.uniform(
                    -stdv, stdv,
                    (w_shape + hidden_size, gate_size)).astype(np.float16)
                self.w_list.append(
                    Parameter(initializer(Tensor(w_np),
                                          [w_shape + hidden_size, gate_size]),
                              name='weight_fw' + str(layer)))
                if has_bias:
                    b_np = np.random.uniform(-stdv, stdv,
                                             gate_size).astype(np.float16)
                    self.b_list.append(
                        Parameter(initializer(Tensor(b_np), [gate_size]),
                                  name='bias_fw' + str(layer)))
                else:
                    self.b_list.append(
                        Parameter(initializer(Tensor(b0), [gate_size]),
                                  name='bias_fw' + str(layer)))
                if bidirectional:
                    w_bw_np = np.random.uniform(
                        -stdv, stdv,
                        (w_shape + hidden_size, gate_size)).astype(np.float16)
                    self.w_list.append(
                        Parameter(
                            initializer(Tensor(w_bw_np),
                                        [w_shape + hidden_size, gate_size]),
                            name='weight_bw' + str(layer)))
                    b_bw_np = np.random.uniform(
                        -stdv, stdv,
                        (4 *
                         hidden_size)).astype(np.float16) if has_bias else b0
                    self.b_list.append(
                        Parameter(initializer(Tensor(b_bw_np), [gate_size]),
                                  name='bias_bw' + str(layer)))
            self.w_list = ParameterTuple(self.w_list)
            self.b_list = ParameterTuple(self.b_list)
        else:
            for layer in range(num_layers):
                input_layer_size = input_size if layer == 0 else hidden_size * num_directions
                increment_size = gate_size * input_layer_size
                increment_size += gate_size * hidden_size
                if has_bias:
                    increment_size += 2 * gate_size
                weight_size += increment_size * num_directions
            w_np = np.random.uniform(-stdv, stdv,
                                     (weight_size, 1, 1)).astype(np.float32)
            self.weight = Parameter(initializer(Tensor(w_np),
                                                [weight_size, 1, 1]),
                                    name='weight')
Ejemplo n.º 27
0
    def __init__(self,
                 vocab_size,
                 embedding_size,
                 field_size,
                 param_init='normal',
                 target='CPU',
                 slice_mode='batch_slice',
                 feature_num_list=None,
                 max_norm=None,
                 sparse=True,
                 operator='SUM'):
        super(MultiFieldEmbeddingLookup,
              self).__init__(vocab_size, embedding_size, param_init, target,
                             slice_mode, feature_num_list, max_norm, sparse)
        self.field_size = validator.check_positive_int(field_size,
                                                       'field_size')
        self.operator = operator

        self.mul = P.Mul()
        self.inf_mask_mul = P.Mul()
        self.bias_add = P.Add()
        self.inf_add = P.Add()
        self.merge_op = None
        self.count_op = P.UnsortedSegmentSum()
        self.abs = P.Abs()
        self.equal = P.Equal()
        self.add = P.Add()
        self.cast = P.Cast()
        self.div_no_nan = P.DivNoNan()
        self.expand = P.ExpandDims()
        self.max_mask_mul = P.Mul()
        self.max_no_equal = P.NotEqual()

        if operator == MultiFieldEmbeddingLookup.OPERATOR_SUM:
            self.merge_op = P.UnsortedSegmentSum()
        elif operator == MultiFieldEmbeddingLookup.OPERATOR_MAX:
            self.merge_op = P.UnsortedSegmentMax()
        elif operator == MultiFieldEmbeddingLookup.OPERATOR_MEAN:
            self.merge_op = P.UnsortedSegmentSum()
        else:
            raise ValueError(
                "The operator supports ['SUM', 'MAX', 'MEAN'], but found: " +
                str(operator))

        parallel_mode = _get_parallel_mode()
        is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL,
                                             ParallelMode.AUTO_PARALLEL)
        if slice_mode in ["table_row_slice", "batch_slice"
                          ] and is_auto_parallel:
            self.merge_op.shard(
                ((get_group_size(), 1, 1), (get_group_size(), 1)))
            self.expand.shard(((get_group_size(), ), ))
            self.bias_add.shard(((1, 1), (1, 1)))
            self.mul.shard(
                ((get_group_size(), 1, 1), (get_group_size(), 1, 1)))
            self.count_op.shard(((get_group_size(), 1), (get_group_size(), 1)))
            self.add.shard(((get_group_size(), ), (get_group_size(), )))
            self.div_no_nan.shard(
                ((get_group_size(), 1), (get_group_size(), 1)))
            self.max_mask_mul.shard(
                ((get_group_size(), 1), (get_group_size(), 1)))
            self.max_no_equal.shard(((1, ), ()))
            if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX:
                self.equal.shard(((get_group_size(), 1, 1), ()))
                self.inf_mask_mul.shard(((get_group_size(), 1, 1), ()))
                self.merge_op.shard(
                    ((get_group_size(), 1), (get_group_size(), )))
                self.count_op.shard(
                    ((get_group_size(), ), (get_group_size(), )))
                self.inf_add.shard(
                    ((get_group_size(), 1, 1), (get_group_size(), 1, 1)))
        elif slice_mode == "table_column_slice" and is_auto_parallel:
            self.merge_op.shard(((1, 1, get_group_size()), (1, 1)))
            self.div_no_nan.shard(((1, get_group_size()), (1, 1)))
            self.bias_add.shard(((1, 1), (1, 1)))
            self.mul.shard(((1, 1, 1), (1, 1, get_group_size())))
            self.count_op.shard(((1, 1), (1, 1)))
            self.add.shard(((1, ), (1, )))
            self.max_mask_mul.shard(((1, get_group_size()), (1, 1)))
            self.expand.shard(((1, ), ))
            self.max_no_equal.shard(((1, ), ()))
            if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX:
                self.equal.shard(((1, 1, 1), ()))
                self.inf_mask_mul.shard(((1, 1, 1), ()))
                self.merge_op.shard(((1, get_group_size()), (1, )))
                self.count_op.shard(((1, ), (1, )))
                self.inf_add.shard(((1, 1, get_group_size()), (1, 1, 1)))
        else:
            if is_auto_parallel:
                raise ValueError(
                    "slice_mode should be  ['table_row_slice', 'batch_slice' and \
                       'table_column_slice'], but get " + str(slice_mode))

        # Min value for fp32
        self.negative_inf_value = -3.402823466E+38
Ejemplo n.º 28
0
 def __init__(self,
              vocab_size,
              embedding_size,
              param_init='normal',
              target='CPU',
              slice_mode='batch_slice',
              manual_shapes=None,
              max_norm=None,
              sparse=True,
              vocab_cache_size=0):
     super(EmbeddingLookup, self).__init__()
     validator.check_value_type('sparse', sparse, [bool], self.cls_name)
     self.vocab_size = validator.check_positive_int(vocab_size,
                                                    'vocab_size')
     self.vocab_cache_size = validator.check_non_negative_int(
         vocab_cache_size, 'vocab_cache_size')
     self.target = target
     self.sparse = sparse
     self.cache_enable = self.vocab_cache_size > 0
     self.forward_unique = False
     if target not in ('CPU', 'DEVICE'):
         raise ValueError(
             'Attr \'target\' of \'EmbeddingLookup\' Op passed ' +
             str(target) +
             ', should be one of values in \'CPU\', \'DEVICE\'.')
     if not sparse and target == 'CPU':
         raise ValueError(
             'When target is CPU, embedding_lookup must be sparse.')
     if sparse:
         self.gatherv2 = P.SparseGatherV2()
     else:
         self.gatherv2 = P.Gather()
     self.embeddinglookup = P.EmbeddingLookup().add_prim_attr(
         'primitive_target', 'CPU')
     enable_ps = _get_ps_context("enable_ps")
     if enable_ps:
         self._process_vocab_cache(slice_mode)
     self.embedding_size = validator.check_positive_int(
         embedding_size, 'embedding_size')
     self.embedding_table = Parameter(initializer(
         param_init, [self.vocab_size, self.embedding_size]),
                                      name='embedding_table')
     parallel_mode = _get_parallel_mode()
     is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL,
                                          ParallelMode.AUTO_PARALLEL)
     self.gather_revert = P.Gather()
     self.reshape_first = P.Reshape()
     self.reshape = P.Reshape()
     self.unique = P.Unique()
     self.shape = P.Shape()
     if is_auto_parallel:
         self.unique = P.Unique().shard(((1, ), ))
     if self.cache_enable and enable_ps:
         self._set_voacb_cache_enable_for_ps(vocab_cache_size,
                                             embedding_size, vocab_size)
         if is_auto_parallel:
             self.unique.add_prim_attr('cache_enable', True)
     indices_shape_size = 2
     if slice_mode == "field_slice" and is_auto_parallel:
         if not manual_shapes:
             raise ValueError(
                 "in slice field mode, the manual_shapes should not be none"
             )
         if not isinstance(manual_shapes, tuple):
             raise TypeError(
                 "manual_shapes type must be tuple(int) cannot be {}!".
                 format(type(manual_shapes)))
         for dim in manual_shapes:
             validator.check_positive_int(dim, 'manual shape dim',
                                          self.cls_name)
         self.gatherv2.add_prim_attr("manual_split", manual_shapes)
         self.embeddinglookup.add_prim_attr("manual_split", manual_shapes)
         self.gatherv2.shard(((get_group_size(), 1), (1, get_group_size())))
         self.embeddinglookup.shard(
             ((get_group_size(), 1), (1, get_group_size())))
     elif slice_mode == "table_row_slice" and is_auto_parallel:
         full_batch = _get_full_batch()
         if (target == 'DEVICE'
                 and not full_batch) or (self.cache_enable and enable_ps
                                         and sparse):
             indices_shape_size = 1
             self.gather_revert.shard(((1, 1), (get_group_size(), )))
             self.forward_unique = True
         indices_strategy = (1, ) * indices_shape_size
         self.gatherv2.shard(((get_group_size(), 1), indices_strategy))
         self.embeddinglookup.shard(
             ((get_group_size(), 1), indices_strategy))
     elif slice_mode == "table_column_slice" and is_auto_parallel:
         if target == 'DEVICE':
             indices_shape_size = 1
             self.gather_revert.shard(((1, get_group_size()), (1, )))
             self.forward_unique = True
         indices_strategy = (1, ) * indices_shape_size
         self.gatherv2.shard(((1, get_group_size()), indices_strategy))
         self.embeddinglookup.shard(
             ((1, get_group_size()), indices_strategy))
     elif slice_mode == "batch_slice" and is_auto_parallel:
         indices_strategy = [get_group_size()]
         indices_strategy.extend([1] * (indices_shape_size - 1))
         indices_strategy = tuple(indices_strategy)
         self.gatherv2.shard(((1, 1), indices_strategy))
         self.embeddinglookup.shard(((1, 1), indices_strategy))
     else:
         if is_auto_parallel:
             raise ValueError(
                 "slice_mode should support mode in nn.EmbeddingLookup, but get "
                 + str(slice_mode))
     if self.cache_enable and not enable_ps:
         if parallel_mode != ParallelMode.STAND_ALONE:
             raise ValueError(
                 "parallel mode haven't supported cache enable yet.")
         self._set_cache_enable()
     self.embedding_table.unique = self.forward_unique
     self.max_norm = max_norm
     if self.max_norm is not None:
         self.max_norm = validator.check_positive_float(
             self.max_norm, 'max_norm', self.cls_name)
         self.max_norm = Tensor(self.max_norm, dtype=mstype.float32)
Ejemplo n.º 29
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=None,
                 device_num_each_group=1,
                 input_dims='2d',
                 data_format='NCHW'):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError("momentum should be a number in range [0, 1], but got {}".format(momentum))
        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
            raise ValueError("NHWC format only support in GPU target.")
        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.input_dims = input_dims
        self.moving_mean = Parameter(initializer(
            moving_mean_init, num_features), name="mean", requires_grad=False)
        self.moving_variance = Parameter(initializer(
            moving_var_init, num_features), name="variance", requires_grad=False)
        self.gamma = Parameter(initializer(
            gamma_init, num_features), name="gamma", requires_grad=affine)
        self.beta = Parameter(initializer(
            beta_init, num_features), name="beta", requires_grad=affine)
        self.group = validator.check_positive_int(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i), self.rank_list[i])
                    self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_gpu = context.get_context("device_target") == "GPU"
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - momentum
        if context.get_context("enable_ge"):
            self.is_ge_backend = True
        else:
            self.is_ge_backend = False

        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True,
                                        epsilon=self.eps)
        elif self.is_gpu:
            self.bn_train = P.FusedBatchNormEx(mode=1,
                                               epsilon=self.eps,
                                               momentum=self.momentum,
                                               data_format=self.format)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format)
        self.enable_global_sync = self.is_global and (self.is_ge_backend or (self.is_graph_mode and self.is_ascend))
        self.enable_default_train = self.is_graph_mode and not self.is_global and \
                                    (self.is_ge_backend or self.is_ascend)

        data_parallel_strategy = ((1,), (1,))
        data_parallel_strategy_one = ((1,), ())
        self.sub_mean = P.Sub().shard(data_parallel_strategy)
        self.sub_var = P.Sub().shard(data_parallel_strategy)
        self.mul_mean = P.Mul().shard(data_parallel_strategy_one)
        self.mul_var = P.Mul().shard(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
Ejemplo n.º 30
0
def polynomial_decay_lr(learning_rate,
                        end_learning_rate,
                        total_step,
                        step_per_epoch,
                        decay_epoch,
                        power,
                        update_decay_epoch=False):
    r"""
    Calculate learning rate base on polynomial decay function.

    For the i-th step, the formula of computing decayed_learning_rate[i] is:

    .. math::
        decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) *
        (1 - tmp\_epoch / tmp\_decay\_epoch)^{power} + end\_learning\_rate

    Where:

    .. math::
        tmp\_epoch = min(current\_epoch, decay\_epoch)

    .. math::
        current\_epoch=floor(\frac{i}{step\_per\_epoch})

    .. math::
        tmp\_decay\_epoch = decay\_epoch

    If `update_decay_epoch` is true, update the value of `tmp_decay_epoch` every epoch. The formula is:

    .. math::
        tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch)

    Args:
        learning_rate (float): The initial value of learning rate.
        end_learning_rate (float): The end value of learning rate.
        total_step (int): The total number of steps.
        step_per_epoch (int): The number of steps in per epoch.
        decay_epoch (int): A value used to calculate decayed learning rate.
        power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0.
        update_decay_epoch (bool): If true, update `decay_epoch`. Default: False.

    Returns:
        list[float]. The size of list is `total_step`.

    Examples:
        >>> learning_rate = 0.1
        >>> end_learning_rate = 0.01
        >>> total_step = 6
        >>> step_per_epoch = 2
        >>> decay_epoch = 2
        >>> power = 0.5
        >>> r = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power)
        >>> print(r)
        [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01]
    """
    validator.check_positive_float(learning_rate, 'learning_rate')
    validator.check_is_float(learning_rate, 'learning_rate')
    if not isinstance(end_learning_rate, float):
        raise TypeError("end_learning_rate must be float.")
    validator.check_non_negative_float(end_learning_rate, "end_learning_rate",
                                       None)
    validator.check_positive_float(power, 'power')
    validator.check_is_float(power, 'power')
    validator.check_positive_int(total_step, 'total_step')
    validator.check_positive_int(step_per_epoch, 'step_per_epoch')
    validator.check_positive_int(decay_epoch, 'decay_epoch')
    validator.check_value_type('update_decay_epoch', update_decay_epoch,
                               [bool])

    origin_decay_epoch = decay_epoch
    function = lambda x, y: (x, min(x, y))
    if update_decay_epoch:
        function = lambda x, y: (origin_decay_epoch * max(
            math.ceil(y / origin_decay_epoch), 1), y)

    lr = []
    delta = learning_rate - end_learning_rate
    for i in range(total_step):
        current_epoch = math.floor(i / step_per_epoch)
        decay_epoch, tmp_epoch = function(decay_epoch, current_epoch)
        lr.append(delta * (1 - tmp_epoch / decay_epoch)**power +
                  end_learning_rate)
    return lr