def __init__(self, config):
     """
     The embedding lookup table for vocabulary
     Args:
         config(PANGUALPHAConfig): the config of network
     Inputs:
         input_ids: the tokenized inputs with datatype int32
     Returns:
         output: Tensor, the embedding vector for the input with shape (batch_size, seq_length, embedding_size)
         self.embedding_table: Tensor, the embedding table for the vocabulary
     """
     super(EmbeddingLookup, self).__init__()
     self.vocab_size = config.vocab_size
     self.embedding_size = config.embedding_size
     if config.load_ckpt_path:
         # Loading the embedding table from the ckpt path:
         embedding_path = os.path.join(config.load_ckpt_path, 'word_embedding.npy')
         if os.path.exists(embedding_path):
             e_table = np.load(embedding_path)
             e_table = Tensor(e_table, mstype.float32)
             self.embedding_table = Parameter(e_table, name="embedding_table")
         else:
             raise ValueError(f"{embedding_path} file not exits, please check whether word_embedding file exist.")
     else:
         self.embedding_table = Parameter(initializer(
             Normal(0.02), [self.vocab_size, self.embedding_size]),
                                          name="embedding_table")
     if config.word_emb_dp:
         self.gather = P.GatherV2().shard(((1, 1), (config.dp, 1)))
     else:
         self.gather = P.GatherV2().shard(((config.mp, 1), (1, 1)))
         self.gather.add_prim_attr("repeated_calc_num_direction", "left")
         if config.forward_reduce_scatter:
             self.gather.add_prim_attr("forward_type", "ReduceScatter")
     self.shape = (-1, config.seq_length, config.embedding_size)
Ejemplo n.º 2
0
    def __init__(self, config):
        super(WideDeepModel, self).__init__()
        self.batch_size = config.batch_size
        parallel_mode = _get_parallel_mode()
        if parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
            self.batch_size = self.batch_size * get_group_size()
        self.field_size = config.field_size
        self.vocab_size = config.vocab_size
        self.emb_dim = config.emb_dim
        self.deep_layer_dims_list = config.deep_layer_dim
        self.deep_layer_act = config.deep_layer_act
        self.init_args = config.init_args
        self.weight_init, self.bias_init = config.weight_bias_init
        self.weight_bias_init = config.weight_bias_init
        self.emb_init = config.emb_init
        self.drop_out = config.dropout_flag
        self.keep_prob = config.keep_prob
        self.deep_input_dims = self.field_size * self.emb_dim
        self.layer_dims = self.deep_layer_dims_list + [1]
        self.all_dim_list = [self.deep_input_dims] + self.layer_dims

        init_acts = [('Wide_w', [self.vocab_size, 1], self.emb_init),
                     ('V_l2', [self.vocab_size, self.emb_dim], self.emb_init),
                     ('Wide_b', [1], self.emb_init)]
        var_map = init_var_dict(self.init_args, init_acts)
        self.wide_w = var_map["Wide_w"]
        self.wide_b = var_map["Wide_b"]
        self.embedding_table = var_map["V_l2"]
        self.dense_layer_1 = DenseLayer(self.all_dim_list[0],
                                        self.all_dim_list[1],
                                        self.weight_bias_init,
                                        self.deep_layer_act, convert_dtype=True)
        self.dense_layer_2 = DenseLayer(self.all_dim_list[1],
                                        self.all_dim_list[2],
                                        self.weight_bias_init,
                                        self.deep_layer_act, convert_dtype=True)
        self.dense_layer_3 = DenseLayer(self.all_dim_list[2],
                                        self.all_dim_list[3],
                                        self.weight_bias_init,
                                        self.deep_layer_act, convert_dtype=True)
        self.dense_layer_4 = DenseLayer(self.all_dim_list[3],
                                        self.all_dim_list[4],
                                        self.weight_bias_init,
                                        self.deep_layer_act, convert_dtype=True)
        self.dense_layer_5 = DenseLayer(self.all_dim_list[4],
                                        self.all_dim_list[5],
                                        self.weight_bias_init,
                                        self.deep_layer_act, convert_dtype=True)

        self.gather_v2 = P.GatherV2().set_strategy(((1, 8), (1, 1)))
        self.gather_v2_1 = P.GatherV2()
        self.mul = P.Mul()
        self.reduce_sum = P.ReduceSum(keep_dims=False)
        self.reshape = P.Reshape()
        self.square = P.Square()
        self.shape = P.Shape()
        self.tile = P.Tile()
        self.concat = P.Concat(axis=1)
        self.cast = P.Cast()
Ejemplo n.º 3
0
    def __init__(self,
                 vocab_size,
                 embedding_size,
                 embedding_shape,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 batch_size=12,
                 damping=0.03,
                 loss_scale=1,
                 frequency=100,
                 ):
        super(Embedding_Thor, self).__init__()
        self.vocab_size = vocab_size
        self.use_one_hot_embeddings = use_one_hot_embeddings
        self.embedding_table = Parameter(initializer
                                         (TruncatedNormal(initializer_range),
                                          [vocab_size, embedding_size]))
        self.thor = True
        self.expand = P.ExpandDims()
        self.shape_flat = (-1,)
        self.gather = P.GatherV2()
        self.one_hot = P.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.array_mul = P.MatMul()
        self.reshape = P.Reshape()
        self.em_shape = tuple(embedding_shape)
        self.shape = P.Shape()
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)

        self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)), requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)),
                                      requires_grad=False)
        self.fake_G = Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16))
        self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32))
        self.dampingG = Tensor(np.identity(embedding_size), mstype.float32)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.damping = damping
        self.gather = P.GatherV2()
        self.sqrt = P.Sqrt()
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.vector_matmul = P.CusBatchMatMul()
        self.cholesky = P.CusCholeskyTrsm()
        self.matrix_combine = P.CusMatrixCombine()
        self.reduce_sum = P.ReduceSum(keep_dims=False)
        self.inv = P.Inv()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.batch_size = batch_size
 def __init__(self, config):
     super(EmbeddingLookupPipeline, self).__init__()
     self.vocab_size = config.vocab_size
     self.embedding_size = config.embedding_size
     if config.word_emb_dp:
         self.gather = P.GatherV2().shard(((1, 1), (config.dp, 1)))
     else:
         self.gather = P.GatherV2().shard(((config.mp, 1), (1, 1)))
         self.gather.add_prim_attr("repeated_calc_num_direction", "left")
         if config.forward_reduce_scatter:
             self.gather.add_prim_attr("forward_type", "ReduceScatter")
     self.gather.add_prim_attr("begin", 0)
     self.shape = (-1, config.seq_length, config.embedding_size)
Ejemplo n.º 5
0
 def __init__(self):
     super().__init__()
     self.unique = P.Unique().shard(((1, ), ))
     self.relu = P.ReLU()
     self.mul = P.Mul()
     self.embedding_lookp = P.GatherV2().shard(((8, 1), (1, )))
     self.embedding_table = Parameter(initializer(
         'normal', [2000, 128]),
                                      name='embedding_table')
     self.gatherv2 = P.GatherV2().shard(((1, 1), (8, )))
     self.reshape = P.Reshape()
     self.matmul = P.MatMul()
     self.mul_weight = Parameter(Tensor(
         np.full([32, 64, 1], 0.5, dtype=np.float32)),
                                 name="mul_weight")
Ejemplo n.º 6
0
 def __init__(self, num_sampled, num_classes, num_true=1,
              sampled_values=None, remove_accidental_hits=True, seed=0,
              reduction='none'):
     super(SampledSoftmaxLoss, self).__init__()
     self.num_sampled = num_sampled
     self.num_classes = num_classes
     self.num_true = num_true
     self.sampled_values = sampled_values
     self.remove_accidental_hits = remove_accidental_hits
     self.seed = seed
     self.sampler = P.UniformSampler(
         num_true,
         num_sampled,
         True,
         num_classes,
         seed,
         remove_accidental_hits)
     self.cast = P.Cast()
     self.reshape = P.Reshape()
     self.shape = P.Shape()
     self.exp = P.Exp()
     self.log = P.Log()
     self.slice_op = P.Slice()
     self.matmul = P.MatMul(False, True)
     self.gather_v2 = P.GatherV2()
     self.reduce_max_true = P.ReduceMax(True)
     self.reduce_sum = P.ReduceSum()
     self.reduce_sum_true = P.ReduceSum(True)
     self.concat_dim0 = P.Concat(0)
     self.concat_dim1 = P.Concat(1)
     self.ones_like = P.OnesLike()
     self.zeros_like = P.ZerosLike()
     self.mul = P.Mul()
     self.expand_dims = P.ExpandDims()
Ejemplo n.º 7
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor_GPU, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]))

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
        self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.cube_matmul = P.MatMul(transpose_a=True)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.cast = P.Cast()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.add = P.TensorAdd()
        self.sqrt = P.Sqrt()
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
Ejemplo n.º 8
0
 def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal',
              dtype=mstype.float32, padding_idx=None):
     super(Embedding, self).__init__()
     self.vocab_size = validator.check_value_type('vocab_size', vocab_size, [int], self.cls_name)
     self.embedding_size = validator.check_value_type('embedding_size', embedding_size, [int], self.cls_name)
     validator.check_value_type('use_one_hot', use_one_hot, [bool], self.cls_name)
     validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name)
     self.use_one_hot = use_one_hot
     self.dtype = dtype
     self.init_tensor = initializer(embedding_table, [vocab_size, embedding_size])
     self.padding_idx = padding_idx
     if padding_idx is not None:
         self.padding_idx = validator.check_int_range(padding_idx, 0, vocab_size, Rel.INC_BOTH,
                                                      "padding_idx", self.cls_name)
         if isinstance(self.init_tensor, Tensor) and self.init_tensor.init is not None:
             self.init_tensor = self.init_tensor.init_data()
         self.init_tensor = self.init_tensor.asnumpy()
         self.init_tensor[self.padding_idx] = 0
         self.init_tensor = Tensor(self.init_tensor)
     self.embedding_table = Parameter(self.init_tensor, name='embedding_table')
     self.expand = P.ExpandDims()
     self.reshape_flat = P.Reshape()
     self.shp_flat = (-1,)
     self.gather = P.GatherV2()
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, self.dtype)
     self.off_value = Tensor(0.0, self.dtype)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.get_shp = P.Shape()
Ejemplo n.º 9
0
    def __init__(self, batch_size=4):
        super(DiceLoss, self).__init__()

        self.threshold0 = Tensor(0.5, mstype.float32)
        self.zero_float32 = Tensor(0.0, mstype.float32)
        self.k = int(640 * 640)
        self.negative_one_int32 = Tensor(-1, mstype.int32)
        self.batch_size = batch_size
        self.concat = P.Concat()
        self.less_equal = P.LessEqual()
        self.greater = P.Greater()
        self.reduce_sum = P.ReduceSum()
        self.reduce_sum_keep_dims = P.ReduceSum(keep_dims=True)
        self.reduce_mean = P.ReduceMean()
        self.reduce_min = P.ReduceMin()
        self.cast = P.Cast()
        self.minimum = P.Minimum()
        self.expand_dims = P.ExpandDims()
        self.select = P.Select()
        self.fill = P.Fill()
        self.topk = P.TopK(sorted=True)
        self.shape = P.Shape()
        self.sigmoid = P.Sigmoid()
        self.reshape = P.Reshape()
        self.slice = P.Slice()
        self.logical_and = P.LogicalAnd()
        self.logical_or = P.LogicalOr()
        self.equal = P.Equal()
        self.zeros_like = P.ZerosLike()
        self.add = P.TensorAdd()
        self.gather = P.GatherV2()
Ejemplo n.º 10
0
    def __init__(self, config):
        super(GetMaskedLMOutput, self).__init__()
        self.width = config.hidden_size
        self.reshape = P.Reshape()
        self.gather = P.GatherV2()

        weight_init = TruncatedNormal(config.initializer_range)
        self.dense = Dense_Thor(in_channels=self.width,
                                out_channels=config.hidden_size,
                                weight_init=weight_init,
                                has_bias=True,
                                bias_init='zeros',
                                damping=damping,
                                loss_scale=loss_scale,
                                frequency=frequency,
                                activation=config.hidden_act,
                                batch_size=batch_size).to_float(
                                    config.compute_type)
        self.layernorm = nn.LayerNorm(
            (config.hidden_size, )).to_float(config.compute_type)
        self.output_bias = Parameter(initializer('zero', config.vocab_size),
                                     name='output_bias')
        self.matmul = P.MatMul(transpose_b=True)
        self.log_softmax = nn.LogSoftmax(axis=-1)
        self.shape_flat_offsets = (-1, 1)
        self.rng = Tensor(
            np.array(range(0, config.batch_size)).astype(np.int32))
        self.last_idx = (-1, )
        self.shape_flat_sequence_tensor = (config.batch_size *
                                           config.seq_length, self.width)
        self.seq_length_tensor = Tensor(
            np.array((config.seq_length, )).astype(np.int32))
        self.cast = P.Cast()
        self.compute_type = config.compute_type
        self.dtype = config.dtype
Ejemplo n.º 11
0
 def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0,
              decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
     super(Momentum, self).__init__(learning_rate, params)
     if isinstance(momentum, float) and momentum < 0.0:
         raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
     if isinstance(learning_rate, Iterable) or \
             (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1):
         self.dynamic_lr = True
         self.gather = P.GatherV2()
         self.assignadd = P.AssignAdd()
         self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step")
         self.axis = 0
     else:
         self.dynamic_lr = False
         self.gather = None
         self.assignadd = None
         self.global_step = None
         self.axis = None
     self.momentum = Parameter(momentum, name="momentum")
     self.params = self.parameters
     self.moments = self.params.clone(prefix="moments", init='zeros')
     self.decay_tf = tuple(decay_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyMomentum()
     self.weight_decay = weight_decay * loss_scale
     self.reciprocal_scale = 1.0 / loss_scale
     self.one = Tensor(1, mstype.int32)
Ejemplo n.º 12
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]),
            name='embeddings_for_position')

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = P.Reshape()
        self.one_hot = P.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.shape = P.Shape()
        self.gather = P.GatherV2()  # index_select
        self.matmul = P.BatchMatMul()
Ejemplo n.º 13
0
 def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, weight_decay=0.0,
              loss_scale=1.0, num_hidden_layers=24, batch_size=12, damping=0.03,
              decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()):
     super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale)
     if isinstance(momentum, float) and momentum < 0.0:
         raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
     self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
     self.params = self.parameters
     self.moments = self.params.clone(prefix="moments", init='zeros')
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyMomentum()
     self.matrix_A = ParameterTuple(matrix_A)
     self.matrix_G = ParameterTuple(matrix_G)
     self.matmul = P.MatMul()
     self.transpose = P.Transpose()
     self.shape = P.Shape()
     self.reshape = P.Reshape()
     self.mul = P.Mul()
     self.gather = P.GatherV2()
     self.matrix_A_inv = ()
     self.matrix_G_inv = ()
     self.num_hidden_layers = num_hidden_layers
     self.sqrt = P.Sqrt()
     self.assign = P.Assign()
     self.cast = P.Cast()
     self.thor = True
     self.weight_decay = weight_decay * loss_scale
     self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
     self.expand = P.ExpandDims()
     self.square = P.Square()
     self.inv = P.Inv()
     self.batch_size = batch_size
     self.damping = damping
     self.one = Tensor(1, mstype.int32)
     self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
Ejemplo n.º 14
0
 def __init__(self):
     super(InsertGradientNet, self).__init__()
     self.gather = P.GatherV2()
     self.damping = Tensor(np.array([0.03, 0.03], np.float32))
     self.cov_step = Parameter(0, name="cov_step", requires_grad=False)
     self.freq = Tensor(278, ms.int32)
     self.getG = P.InsertGradientOf(self.save_gradient)
Ejemplo n.º 15
0
 def __init__(self,
              strategy1=None,
              strategy2=None,
              strategy3=None,
              axis=0,
              init_flag=True,
              split_tuple=(4, 4),
              split_string="manual_split",
              param_shape=(8, 8)):
     super().__init__()
     self.gatherv2 = P.GatherV2().shard(strategy1)
     self.gatherv2.add_prim_attr(split_string, split_tuple)
     self.mul = P.Mul().shard(strategy2)
     self.reshape = P.Reshape()
     self.matmul = P.MatMul().shard(strategy3)
     self.matmul.add_prim_attr("forward_reduce_scatter", True)
     if init_flag:
         self.param = Parameter(initializer("ones", param_shape,
                                            ms.float32),
                                name="gatherv2_param")
     else:
         self.param = Parameter(Tensor(np.ones(param_shape),
                                       dtype=ms.float32),
                                name="gatherv2_param")
     self.mul_weight = Parameter(initializer("ones", (8, 8, 8), ms.float32),
                                 name="mul_weight")
     self.matmul_weight = Parameter(initializer("ones", (64, 16),
                                                ms.float32),
                                    name="matmul_weight")
     self.axis = axis
Ejemplo n.º 16
0
    def __init__(self,
                 is_training,
                 vocab_size,
                 embed_dim,
                 initializer_range=0.1,
                 use_one_hot_embeddings=False):

        super(EmbeddingLookup, self).__init__()
        self.is_training = is_training
        self.embedding_dim = embed_dim
        self.vocab_size = vocab_size
        self.use_one_hot_embeddings = use_one_hot_embeddings

        init_weight = np.random.normal(-initializer_range,
                                       initializer_range,
                                       size=[vocab_size, embed_dim])
        self.embedding_table = Parameter(Tensor(init_weight, mstype.float32),
                                         name='embedding_table')
        self.expand = P.ExpandDims()
        self.gather = P.GatherV2()
        self.one_hot = P.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.array_mul = P.MatMul()
        self.reshape = P.Reshape()
        self.get_shape = P.Shape()
        self.cast = P.Cast()
Ejemplo n.º 17
0
    def __init__(self, config):
        super(GetMaskedLMOutput, self).__init__()
        self.width = config.hidden_size
        self.reshape = P.Reshape()
        self.gather = P.GatherV2()

        weight_init = TruncatedNormal(config.initializer_range)
        self.dense = nn.Dense(self.width,
                              config.hidden_size,
                              weight_init=weight_init,
                              activation=config.hidden_act).to_float(config.compute_type)
        self.layernorm = nn.LayerNorm((config.hidden_size,)).to_float(config.compute_type)
        self.output_bias = Parameter(
            initializer(
                'zero',
                config.vocab_size),
            name='output_bias')
        self.matmul = P.MatMul(transpose_b=True)
        self.log_softmax = nn.LogSoftmax(axis=-1)
        self.shape_flat_offsets = (-1, 1)
        self.last_idx = (-1,)
        self.shape_flat_sequence_tensor = (-1, self.width)
        self.seq_length_tensor = Tensor(np.array((config.seq_length,)).astype(np.int32))
        self.cast = P.Cast()
        self.compute_type = config.compute_type
        self.dtype = config.dtype
Ejemplo n.º 18
0
 def __init__(self,
              use_relative_positions,
              embedding_size,
              embedding_shape,
              use_token_type=False,
              token_type_vocab_size=16,
              use_one_hot_embeddings=False,
              initializer_range=0.02,
              max_position_embeddings=512,
              dropout_prob=0.1):
     super(EmbeddingPostprocessor, self).__init__()
     self.use_token_type = use_token_type
     self.token_type_vocab_size = token_type_vocab_size
     self.use_one_hot_embeddings = use_one_hot_embeddings
     self.max_position_embeddings = max_position_embeddings
     self.embedding_table = Parameter(
         initializer(TruncatedNormal(initializer_range),
                     [token_type_vocab_size, embedding_size]))
     self.shape_flat = (-1, )
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, mstype.float32)
     self.off_value = Tensor(0.1, mstype.float32)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.shape = tuple(embedding_shape)
     self.layernorm = nn.LayerNorm((embedding_size, ))
     self.dropout = nn.Dropout(1 - dropout_prob)
     self.gather = P.GatherV2()
     self.use_relative_positions = use_relative_positions
     self.slice = P.StridedSlice()
     self.full_position_embeddings = Parameter(
         initializer(TruncatedNormal(initializer_range),
                     [max_position_embeddings, embedding_size]))
Ejemplo n.º 19
0
 def __init__(self,
              optimizer,
              epsilon=1e-05,
              hyperpara=0.001,
              weight_decay=0.0,
              use_clip=False,
              decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias'
              not in x.name,
              lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias'
              not in x.name,
              loss_scale=1.0):
     super(LARS, self).__init__(0.0,
                                [Parameter(Tensor(0.0), name="trivial")])
     self.opt = optimizer
     self.parameters = optimizer.parameters
     self.learning_rate = optimizer.learning_rate
     self.lars = P.LARSUpdate(epsilon, hyperpara, use_clip)
     self.reciprocal_scale = 1.0 / loss_scale
     self.weight_decay = weight_decay * loss_scale
     self.cast = P.Cast()
     self.decay_flag = tuple(decay_filter(x) for x in self.parameters)
     self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.dynamic_lr = False
     self.gather = None
     self.global_step = None
     self.axis = None
     if isinstance(self.learning_rate.default_input, Iterable) or \
             (isinstance(self.learning_rate.default_input, Tensor) and self.learning_rate.default_input.dim() == 1):
         self.dynamic_lr = True
         self.assignadd = P.AssignAdd()
         self.gather = P.GatherV2()
         self.global_step = Parameter(initializer(0, [1], mstype.int32),
                                      name="lars_global_step")
         self.axis = 0
Ejemplo n.º 20
0
    def __init__(self, vocab_size, embed_dim, use_one_hot_embeddings=False):
        """
        Embeddings lookup table with a fixed dictionary and size.

        Args:
            vocab_size (int): Size of the dictionary of embeddings.
            embed_dim (int): The size of word embedding.
            use_one_hot_embeddings (bool): Whether use one-hot embedding. Default: False.
        """
        super(EmbeddingLookup, self).__init__()
        self.embedding_dim = embed_dim
        self.vocab_size = vocab_size
        self.use_one_hot_embeddings = use_one_hot_embeddings

        init_weight = np.random.normal(0,
                                       embed_dim**-0.5,
                                       size=[vocab_size, embed_dim])
        # 0 is Padding index, thus init it as 0.
        init_weight[0, :] = 0
        self.embedding_table = Parameter(Tensor(init_weight),
                                         name='embedding_table')
        self.expand = P.ExpandDims()
        self.gather = P.GatherV2()
        self.one_hot = P.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.array_mul = P.MatMul()
        self.reshape = P.Reshape()
        self.get_shape = P.Shape()
Ejemplo n.º 21
0
 def __init__(self,
              vocab_size,
              embedding_size,
              use_one_hot=False,
              embedding_table='normal',
              dtype=mstype.float32):
     super(Embedding, self).__init__()
     validator.check_subclass("dtype", dtype, mstype.number_type,
                              self.cls_name)
     validator.check_value_type('use_one_hot', use_one_hot, [bool],
                                self.cls_name)
     self.vocab_size = vocab_size
     self.embedding_size = embedding_size
     self.use_one_hot = use_one_hot
     self.embedding_table = Parameter(initializer(
         embedding_table, [vocab_size, embedding_size]),
                                      name='embedding_table')
     self.dtype = dtype
     self.expand = P.ExpandDims()
     self.reshape_flat = P.Reshape()
     self.shp_flat = (-1, )
     self.gather = P.GatherV2()
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, self.dtype)
     self.off_value = Tensor(0.0, self.dtype)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.get_shp = P.Shape()
Ejemplo n.º 22
0
 def __init__(self, axis=0, dyn_a=True, dyn_b=True):
     super(GatherNetDynamic, self).__init__()
     self.gather = P.GatherV2()
     self.gpu_convert_to_dynamic_shape = inner.GpuConvertToDynamicShape()
     self.to_dyn_1 = dyn_a
     self.to_dyn_2 = dyn_b
     self.axis = axis
Ejemplo n.º 23
0
 def __init__(self):
     super(Pca, self).__init__()
     self.reduce_mean = ops.ReduceMean(keep_dims=True)
     self.reshape = ops.Reshape()
     self.matmul_a = ops.MatMul(transpose_a=True)
     self.matmul_b = ops.MatMul(transpose_b=True)
     self.top_k = ops.TopK(sorted=True)
     self.gather = ops.GatherV2()
Ejemplo n.º 24
0
    def construct(self, input_ids, input_mask, label_ids):
        logits = self.gpt2(input_ids, input_mask)
        shift_logits = logits[:,:-1,:]
        shift_logits = P.Reshape()(shift_logits,(-1,self.num_labels))
        label_ids = P.GatherV2()(label_ids, self.label_indices, 1)

        loss = self.loss(shift_logits, label_ids, self.num_labels)
        return P.Cast()(loss, mstype.float32)
Ejemplo n.º 25
0
 def __init__(self, config):
     super(EmbeddingLookup, self).__init__()
     self.vocab_size = config.vocab_size
     self.embedding_size = config.embedding_size
     self.embedding_table = Parameter(initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size]),
                                      name="embedding_table")
     self.gather = P.GatherV2()
     self.shape = (-1, config.seq_length, config.embedding_size)
Ejemplo n.º 26
0
 def __init__(self, axis=0, shape=None):
     super(Net, self).__init__()
     if shape is None:
         shape = [8, 8]
     self.mirror = _MirrorOperator(group=HCCL_WORLD_COMM_GROUP)
     self.gatherv2 = P.GatherV2()
     self.index = Tensor(np.ones(shape), dtype=ms.int32)
     self.axis = axis
Ejemplo n.º 27
0
 def __init__(self, axis=0, strategy1=None, strategy2=None, shape=None, target=""):
     super().__init__()
     if shape is None:
         shape = [64, 64]
     self.gatherv2 = P.GatherV2().set_strategy(strategy1).add_prim_attr("primitive_target", target)
     self.mul = P.Mul().set_strategy(strategy2)
     self.index = Tensor(np.ones(shape), dtype=ms.int32)
     self.axis = axis
Ejemplo n.º 28
0
 def __init__(self, matmul_weight, strategy1=None):
     super().__init__()
     self.gatherv2 = P.GatherV2().set_strategy(strategy1)
     self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True)
     self.matmul = P.MatMul(transpose_b=False)
     self.index = Tensor(np.ones([64, 64]), dtype=ms.int32)
     self.matmul_weight = Parameter(matmul_weight, "w1")
     self.axis = 0
Ejemplo n.º 29
0
 def __init__(self, axis=0, shape=None):
     super(Net, self).__init__()
     if shape is None:
         shape = [8, 8]
     self.all_reduce = AllReduce()
     self.gatherv2 = P.GatherV2()
     self.index = Tensor(np.ones(shape), dtype=ms.int32)
     self.axis = axis
Ejemplo n.º 30
0
 def __init__(self, axis=0, strategy1=None, strategy2=None, shape=None):
     super().__init__()
     if shape is None:
         shape = [64, 64]
     self.gatherv2 = P.GatherV2().set_strategy(strategy1)
     self.mul = P.Mul().set_strategy(strategy2)
     self.index = Tensor(np.ones(shape), dtype=ms.int32)
     self.axis = axis