def __init__(self, config): """ The embedding lookup table for vocabulary Args: config(PANGUALPHAConfig): the config of network Inputs: input_ids: the tokenized inputs with datatype int32 Returns: output: Tensor, the embedding vector for the input with shape (batch_size, seq_length, embedding_size) self.embedding_table: Tensor, the embedding table for the vocabulary """ super(EmbeddingLookup, self).__init__() self.vocab_size = config.vocab_size self.embedding_size = config.embedding_size if config.load_ckpt_path: # Loading the embedding table from the ckpt path: embedding_path = os.path.join(config.load_ckpt_path, 'word_embedding.npy') if os.path.exists(embedding_path): e_table = np.load(embedding_path) e_table = Tensor(e_table, mstype.float32) self.embedding_table = Parameter(e_table, name="embedding_table") else: raise ValueError(f"{embedding_path} file not exits, please check whether word_embedding file exist.") else: self.embedding_table = Parameter(initializer( Normal(0.02), [self.vocab_size, self.embedding_size]), name="embedding_table") if config.word_emb_dp: self.gather = P.GatherV2().shard(((1, 1), (config.dp, 1))) else: self.gather = P.GatherV2().shard(((config.mp, 1), (1, 1))) self.gather.add_prim_attr("repeated_calc_num_direction", "left") if config.forward_reduce_scatter: self.gather.add_prim_attr("forward_type", "ReduceScatter") self.shape = (-1, config.seq_length, config.embedding_size)
def __init__(self, config): super(WideDeepModel, self).__init__() self.batch_size = config.batch_size parallel_mode = _get_parallel_mode() if parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL): self.batch_size = self.batch_size * get_group_size() self.field_size = config.field_size self.vocab_size = config.vocab_size self.emb_dim = config.emb_dim self.deep_layer_dims_list = config.deep_layer_dim self.deep_layer_act = config.deep_layer_act self.init_args = config.init_args self.weight_init, self.bias_init = config.weight_bias_init self.weight_bias_init = config.weight_bias_init self.emb_init = config.emb_init self.drop_out = config.dropout_flag self.keep_prob = config.keep_prob self.deep_input_dims = self.field_size * self.emb_dim self.layer_dims = self.deep_layer_dims_list + [1] self.all_dim_list = [self.deep_input_dims] + self.layer_dims init_acts = [('Wide_w', [self.vocab_size, 1], self.emb_init), ('V_l2', [self.vocab_size, self.emb_dim], self.emb_init), ('Wide_b', [1], self.emb_init)] var_map = init_var_dict(self.init_args, init_acts) self.wide_w = var_map["Wide_w"] self.wide_b = var_map["Wide_b"] self.embedding_table = var_map["V_l2"] self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.dense_layer_5 = DenseLayer(self.all_dim_list[4], self.all_dim_list[5], self.weight_bias_init, self.deep_layer_act, convert_dtype=True) self.gather_v2 = P.GatherV2().set_strategy(((1, 8), (1, 1))) self.gather_v2_1 = P.GatherV2() self.mul = P.Mul() self.reduce_sum = P.ReduceSum(keep_dims=False) self.reshape = P.Reshape() self.square = P.Square() self.shape = P.Shape() self.tile = P.Tile() self.concat = P.Concat(axis=1) self.cast = P.Cast()
def __init__(self, vocab_size, embedding_size, embedding_shape, use_one_hot_embeddings=False, initializer_range=0.02, batch_size=12, damping=0.03, loss_scale=1, frequency=100, ): super(Embedding_Thor, self).__init__() self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [vocab_size, embedding_size])) self.thor = True self.expand = P.ExpandDims() self.shape_flat = (-1,) self.gather = P.GatherV2() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.em_shape = tuple(embedding_shape) self.shape = P.Shape() self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.matrix_A_inv = Parameter(Tensor(np.zeros([vocab_size]).astype(np.float16)), requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)), requires_grad=False) self.fake_G = Tensor(np.zeros([embedding_size, embedding_size]).astype(np.float16)) self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32)) self.dampingG = Tensor(np.identity(embedding_size), mstype.float32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.damping = damping self.gather = P.GatherV2() self.sqrt = P.Sqrt() self.mul = P.Mul() self.cast = P.Cast() self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.vector_matmul = P.CusBatchMatMul() self.cholesky = P.CusCholeskyTrsm() self.matrix_combine = P.CusMatrixCombine() self.reduce_sum = P.ReduceSum(keep_dims=False) self.inv = P.Inv() self.getG = P.InsertGradientOf(self.save_gradient) self.batch_size = batch_size
def __init__(self, config): super(EmbeddingLookupPipeline, self).__init__() self.vocab_size = config.vocab_size self.embedding_size = config.embedding_size if config.word_emb_dp: self.gather = P.GatherV2().shard(((1, 1), (config.dp, 1))) else: self.gather = P.GatherV2().shard(((config.mp, 1), (1, 1))) self.gather.add_prim_attr("repeated_calc_num_direction", "left") if config.forward_reduce_scatter: self.gather.add_prim_attr("forward_type", "ReduceScatter") self.gather.add_prim_attr("begin", 0) self.shape = (-1, config.seq_length, config.embedding_size)
def __init__(self): super().__init__() self.unique = P.Unique().shard(((1, ), )) self.relu = P.ReLU() self.mul = P.Mul() self.embedding_lookp = P.GatherV2().shard(((8, 1), (1, ))) self.embedding_table = Parameter(initializer( 'normal', [2000, 128]), name='embedding_table') self.gatherv2 = P.GatherV2().shard(((1, 1), (8, ))) self.reshape = P.Reshape() self.matmul = P.MatMul() self.mul_weight = Parameter(Tensor( np.full([32, 64, 1], 0.5, dtype=np.float32)), name="mul_weight")
def __init__(self, num_sampled, num_classes, num_true=1, sampled_values=None, remove_accidental_hits=True, seed=0, reduction='none'): super(SampledSoftmaxLoss, self).__init__() self.num_sampled = num_sampled self.num_classes = num_classes self.num_true = num_true self.sampled_values = sampled_values self.remove_accidental_hits = remove_accidental_hits self.seed = seed self.sampler = P.UniformSampler( num_true, num_sampled, True, num_classes, seed, remove_accidental_hits) self.cast = P.Cast() self.reshape = P.Reshape() self.shape = P.Shape() self.exp = P.Exp() self.log = P.Log() self.slice_op = P.Slice() self.matmul = P.MatMul(False, True) self.gather_v2 = P.GatherV2() self.reduce_max_true = P.ReduceMax(True) self.reduce_sum = P.ReduceSum() self.reduce_sum_true = P.ReduceSum(True) self.concat_dim0 = P.Concat(0) self.concat_dim1 = P.Concat(1) self.ones_like = P.OnesLike() self.zeros_like = P.ZerosLike() self.mul = P.Mul() self.expand_dims = P.ExpandDims()
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, batch_size=32, has_bias=True, activation=None): super(Dense_Thor_GPU, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels])) if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels])) self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None split_dim = 128 matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim) self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False) self.broadcast_to = P.BroadcastTo(matrix_A_shape) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.mul = P.Mul() self.cube_matmul = P.MatMul(transpose_a=True) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.batch_size = Tensor(batch_size, mstype.float16) self.getG = P.InsertGradientOf(self.save_gradient) self.damping = Parameter(Tensor(damping), requires_grad=False) self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.cast = P.Cast() self.gather = P.GatherV2() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.add = P.TensorAdd() self.sqrt = P.Sqrt() self.cholesky = P.CholeskyTrsm(split_dim=split_dim) self.vector_matmul = P.BatchMatMul(transpose_a=True)
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mstype.float32, padding_idx=None): super(Embedding, self).__init__() self.vocab_size = validator.check_value_type('vocab_size', vocab_size, [int], self.cls_name) self.embedding_size = validator.check_value_type('embedding_size', embedding_size, [int], self.cls_name) validator.check_value_type('use_one_hot', use_one_hot, [bool], self.cls_name) validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name) self.use_one_hot = use_one_hot self.dtype = dtype self.init_tensor = initializer(embedding_table, [vocab_size, embedding_size]) self.padding_idx = padding_idx if padding_idx is not None: self.padding_idx = validator.check_int_range(padding_idx, 0, vocab_size, Rel.INC_BOTH, "padding_idx", self.cls_name) if isinstance(self.init_tensor, Tensor) and self.init_tensor.init is not None: self.init_tensor = self.init_tensor.init_data() self.init_tensor = self.init_tensor.asnumpy() self.init_tensor[self.padding_idx] = 0 self.init_tensor = Tensor(self.init_tensor) self.embedding_table = Parameter(self.init_tensor, name='embedding_table') self.expand = P.ExpandDims() self.reshape_flat = P.Reshape() self.shp_flat = (-1,) self.gather = P.GatherV2() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, self.dtype) self.off_value = Tensor(0.0, self.dtype) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shp = P.Shape()
def __init__(self, batch_size=4): super(DiceLoss, self).__init__() self.threshold0 = Tensor(0.5, mstype.float32) self.zero_float32 = Tensor(0.0, mstype.float32) self.k = int(640 * 640) self.negative_one_int32 = Tensor(-1, mstype.int32) self.batch_size = batch_size self.concat = P.Concat() self.less_equal = P.LessEqual() self.greater = P.Greater() self.reduce_sum = P.ReduceSum() self.reduce_sum_keep_dims = P.ReduceSum(keep_dims=True) self.reduce_mean = P.ReduceMean() self.reduce_min = P.ReduceMin() self.cast = P.Cast() self.minimum = P.Minimum() self.expand_dims = P.ExpandDims() self.select = P.Select() self.fill = P.Fill() self.topk = P.TopK(sorted=True) self.shape = P.Shape() self.sigmoid = P.Sigmoid() self.reshape = P.Reshape() self.slice = P.Slice() self.logical_and = P.LogicalAnd() self.logical_or = P.LogicalOr() self.equal = P.Equal() self.zeros_like = P.ZerosLike() self.add = P.TensorAdd() self.gather = P.GatherV2()
def __init__(self, config): super(GetMaskedLMOutput, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.gather = P.GatherV2() weight_init = TruncatedNormal(config.initializer_range) self.dense = Dense_Thor(in_channels=self.width, out_channels=config.hidden_size, weight_init=weight_init, has_bias=True, bias_init='zeros', damping=damping, loss_scale=loss_scale, frequency=frequency, activation=config.hidden_act, batch_size=batch_size).to_float( config.compute_type) self.layernorm = nn.LayerNorm( (config.hidden_size, )).to_float(config.compute_type) self.output_bias = Parameter(initializer('zero', config.vocab_size), name='output_bias') self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_offsets = (-1, 1) self.rng = Tensor( np.array(range(0, config.batch_size)).astype(np.int32)) self.last_idx = (-1, ) self.shape_flat_sequence_tensor = (config.batch_size * config.seq_length, self.width) self.seq_length_tensor = Tensor( np.array((config.seq_length, )).astype(np.int32)) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype
def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(Momentum, self).__init__(learning_rate, params) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) if isinstance(learning_rate, Iterable) or \ (isinstance(learning_rate, Tensor) and learning_rate.dim() == 1): self.dynamic_lr = True self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="global_step") self.axis = 0 else: self.dynamic_lr = False self.gather = None self.assignadd = None self.global_step = None self.axis = None self.momentum = Parameter(momentum, name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.decay_tf = tuple(decay_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.weight_decay = weight_decay * loss_scale self.reciprocal_scale = 1.0 / loss_scale self.one = Tensor(1, mstype.int32)
def __init__(self, length, depth, max_relative_position, initializer_range, use_one_hot_embeddings=False): super(RelaPosEmbeddingsGenerator, self).__init__() self.depth = depth self.vocab_size = max_relative_position * 2 + 1 self.use_one_hot_embeddings = use_one_hot_embeddings self.embeddings_table = Parameter( initializer(TruncatedNormal(initializer_range), [self.vocab_size, self.depth]), name='embeddings_for_position') self.relative_positions_matrix = RelaPosMatrixGenerator(length=length, max_relative_position=max_relative_position) self.reshape = P.Reshape() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.shape = P.Shape() self.gather = P.GatherV2() # index_select self.matmul = P.BatchMatMul()
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, weight_decay=0.0, loss_scale=1.0, num_hidden_layers=24, batch_size=12, damping=0.03, decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.matmul = P.MatMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.gather = P.GatherV2() self.matrix_A_inv = () self.matrix_G_inv = () self.num_hidden_layers = num_hidden_layers self.sqrt = P.Sqrt() self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.expand = P.ExpandDims() self.square = P.Square() self.inv = P.Inv() self.batch_size = batch_size self.damping = damping self.one = Tensor(1, mstype.int32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
def __init__(self): super(InsertGradientNet, self).__init__() self.gather = P.GatherV2() self.damping = Tensor(np.array([0.03, 0.03], np.float32)) self.cov_step = Parameter(0, name="cov_step", requires_grad=False) self.freq = Tensor(278, ms.int32) self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self, strategy1=None, strategy2=None, strategy3=None, axis=0, init_flag=True, split_tuple=(4, 4), split_string="manual_split", param_shape=(8, 8)): super().__init__() self.gatherv2 = P.GatherV2().shard(strategy1) self.gatherv2.add_prim_attr(split_string, split_tuple) self.mul = P.Mul().shard(strategy2) self.reshape = P.Reshape() self.matmul = P.MatMul().shard(strategy3) self.matmul.add_prim_attr("forward_reduce_scatter", True) if init_flag: self.param = Parameter(initializer("ones", param_shape, ms.float32), name="gatherv2_param") else: self.param = Parameter(Tensor(np.ones(param_shape), dtype=ms.float32), name="gatherv2_param") self.mul_weight = Parameter(initializer("ones", (8, 8, 8), ms.float32), name="mul_weight") self.matmul_weight = Parameter(initializer("ones", (64, 16), ms.float32), name="matmul_weight") self.axis = axis
def __init__(self, is_training, vocab_size, embed_dim, initializer_range=0.1, use_one_hot_embeddings=False): super(EmbeddingLookup, self).__init__() self.is_training = is_training self.embedding_dim = embed_dim self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings init_weight = np.random.normal(-initializer_range, initializer_range, size=[vocab_size, embed_dim]) self.embedding_table = Parameter(Tensor(init_weight, mstype.float32), name='embedding_table') self.expand = P.ExpandDims() self.gather = P.GatherV2() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shape = P.Shape() self.cast = P.Cast()
def __init__(self, config): super(GetMaskedLMOutput, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.gather = P.GatherV2() weight_init = TruncatedNormal(config.initializer_range) self.dense = nn.Dense(self.width, config.hidden_size, weight_init=weight_init, activation=config.hidden_act).to_float(config.compute_type) self.layernorm = nn.LayerNorm((config.hidden_size,)).to_float(config.compute_type) self.output_bias = Parameter( initializer( 'zero', config.vocab_size), name='output_bias') self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_offsets = (-1, 1) self.last_idx = (-1,) self.shape_flat_sequence_tensor = (-1, self.width) self.seq_length_tensor = Tensor(np.array((config.seq_length,)).astype(np.int32)) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype
def __init__(self, use_relative_positions, embedding_size, embedding_shape, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.embedding_table = Parameter( initializer(TruncatedNormal(initializer_range), [token_type_vocab_size, embedding_size])) self.shape_flat = (-1, ) self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.1, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.layernorm = nn.LayerNorm((embedding_size, )) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = P.GatherV2() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() self.full_position_embeddings = Parameter( initializer(TruncatedNormal(initializer_range), [max_position_embeddings, embedding_size]))
def __init__(self, optimizer, epsilon=1e-05, hyperpara=0.001, weight_decay=0.0, use_clip=False, decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name, lars_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name, loss_scale=1.0): super(LARS, self).__init__(0.0, [Parameter(Tensor(0.0), name="trivial")]) self.opt = optimizer self.parameters = optimizer.parameters self.learning_rate = optimizer.learning_rate self.lars = P.LARSUpdate(epsilon, hyperpara, use_clip) self.reciprocal_scale = 1.0 / loss_scale self.weight_decay = weight_decay * loss_scale self.cast = P.Cast() self.decay_flag = tuple(decay_filter(x) for x in self.parameters) self.lars_flag = tuple(lars_filter(x) for x in self.parameters) self.hyper_map = C.HyperMap() self.dynamic_lr = False self.gather = None self.global_step = None self.axis = None if isinstance(self.learning_rate.default_input, Iterable) or \ (isinstance(self.learning_rate.default_input, Tensor) and self.learning_rate.default_input.dim() == 1): self.dynamic_lr = True self.assignadd = P.AssignAdd() self.gather = P.GatherV2() self.global_step = Parameter(initializer(0, [1], mstype.int32), name="lars_global_step") self.axis = 0
def __init__(self, vocab_size, embed_dim, use_one_hot_embeddings=False): """ Embeddings lookup table with a fixed dictionary and size. Args: vocab_size (int): Size of the dictionary of embeddings. embed_dim (int): The size of word embedding. use_one_hot_embeddings (bool): Whether use one-hot embedding. Default: False. """ super(EmbeddingLookup, self).__init__() self.embedding_dim = embed_dim self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings init_weight = np.random.normal(0, embed_dim**-0.5, size=[vocab_size, embed_dim]) # 0 is Padding index, thus init it as 0. init_weight[0, :] = 0 self.embedding_table = Parameter(Tensor(init_weight), name='embedding_table') self.expand = P.ExpandDims() self.gather = P.GatherV2() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shape = P.Shape()
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mstype.float32): super(Embedding, self).__init__() validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name) validator.check_value_type('use_one_hot', use_one_hot, [bool], self.cls_name) self.vocab_size = vocab_size self.embedding_size = embedding_size self.use_one_hot = use_one_hot self.embedding_table = Parameter(initializer( embedding_table, [vocab_size, embedding_size]), name='embedding_table') self.dtype = dtype self.expand = P.ExpandDims() self.reshape_flat = P.Reshape() self.shp_flat = (-1, ) self.gather = P.GatherV2() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, self.dtype) self.off_value = Tensor(0.0, self.dtype) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shp = P.Shape()
def __init__(self, axis=0, dyn_a=True, dyn_b=True): super(GatherNetDynamic, self).__init__() self.gather = P.GatherV2() self.gpu_convert_to_dynamic_shape = inner.GpuConvertToDynamicShape() self.to_dyn_1 = dyn_a self.to_dyn_2 = dyn_b self.axis = axis
def __init__(self): super(Pca, self).__init__() self.reduce_mean = ops.ReduceMean(keep_dims=True) self.reshape = ops.Reshape() self.matmul_a = ops.MatMul(transpose_a=True) self.matmul_b = ops.MatMul(transpose_b=True) self.top_k = ops.TopK(sorted=True) self.gather = ops.GatherV2()
def construct(self, input_ids, input_mask, label_ids): logits = self.gpt2(input_ids, input_mask) shift_logits = logits[:,:-1,:] shift_logits = P.Reshape()(shift_logits,(-1,self.num_labels)) label_ids = P.GatherV2()(label_ids, self.label_indices, 1) loss = self.loss(shift_logits, label_ids, self.num_labels) return P.Cast()(loss, mstype.float32)
def __init__(self, config): super(EmbeddingLookup, self).__init__() self.vocab_size = config.vocab_size self.embedding_size = config.embedding_size self.embedding_table = Parameter(initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size]), name="embedding_table") self.gather = P.GatherV2() self.shape = (-1, config.seq_length, config.embedding_size)
def __init__(self, axis=0, shape=None): super(Net, self).__init__() if shape is None: shape = [8, 8] self.mirror = _MirrorOperator(group=HCCL_WORLD_COMM_GROUP) self.gatherv2 = P.GatherV2() self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis
def __init__(self, axis=0, strategy1=None, strategy2=None, shape=None, target=""): super().__init__() if shape is None: shape = [64, 64] self.gatherv2 = P.GatherV2().set_strategy(strategy1).add_prim_attr("primitive_target", target) self.mul = P.Mul().set_strategy(strategy2) self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis
def __init__(self, matmul_weight, strategy1=None): super().__init__() self.gatherv2 = P.GatherV2().set_strategy(strategy1) self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True) self.matmul = P.MatMul(transpose_b=False) self.index = Tensor(np.ones([64, 64]), dtype=ms.int32) self.matmul_weight = Parameter(matmul_weight, "w1") self.axis = 0
def __init__(self, axis=0, shape=None): super(Net, self).__init__() if shape is None: shape = [8, 8] self.all_reduce = AllReduce() self.gatherv2 = P.GatherV2() self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis
def __init__(self, axis=0, strategy1=None, strategy2=None, shape=None): super().__init__() if shape is None: shape = [64, 64] self.gatherv2 = P.GatherV2().set_strategy(strategy1) self.mul = P.Mul().set_strategy(strategy2) self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis