def __init__(self, vocab_size, embedding_dims, num_class): super(FastText, self).__init__() self.vocab_size = vocab_size self.embeding_dims = embedding_dims self.num_class = num_class self.embeding_func = nn.Embedding(vocab_size=self.vocab_size, embedding_size=self.embeding_dims, padding_idx=0, embedding_table='Zeros') self.fc = nn.Dense(self.embeding_dims, out_channels=self.num_class, weight_init=XavierUniform(1)).to_float( mstype.float16) self.reducesum = P.ReduceSum() self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(axis=1) self.cast = P.Cast() self.tile = P.Tile() self.realdiv = P.RealDiv() self.fill = P.Fill() self.log_softmax = nn.LogSoftmax(axis=1)
def __init__(self, vocab_size, embedding_size, embedding_shape, use_one_hot_embeddings=False, initializer_range=0.02): super(EmbeddingLookup, self).__init__() self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [vocab_size, embedding_size])) self.expand = P.ExpandDims() self.shape_flat = (-1,) self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape)
def __init__(self, vocab_size, embedding_dim, use_one_hot_embeddings=False): super(EmbeddingLookup, self).__init__() self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.use_one_hot_embeddings = use_one_hot_embeddings self.embedding_table = Parameter(normal_weight( [vocab_size, embedding_dim], embedding_dim), name='embedding_table') self.expand = P.ExpandDims() self.shape_flat = (-1, ) self.gather = P.GatherV2() # axis=1 从列取 axis=0从行取 index_select self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = P.Shape()
def __init__(self, kernel_size=1, stride=1, pad_mode="valid"): validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) validator.check_value_type('stride', stride, [int], self.cls_name) self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name) validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode) self.kernel_size = (1, kernel_size) self.stride = (1, stride) self.avg_pool = P.AvgPool(kernel_size=self.kernel_size, strides=self.stride, pad_mode=self.pad_mode) self.shape = F.shape self.reduce_mean = P.ReduceMean(keep_dims=True) self.slice = P.Slice() self.expand = P.ExpandDims() self.squeeze = P.Squeeze(2)
def __init__(self, axis=None): super(ClipByNorm, self).__init__() if axis is None: axis = () if isinstance(axis, tuple): for idx, item in enumerate(axis): Validator.check_value_type("axis[%d]" % idx, item, [int], self.cls_name) self.axis = Validator.check_value_type('axis', axis, [int, tuple], self.cls_name) self.reduce_sum = P.ReduceSum(keep_dims=True) self.select_ = P.Select() self.greater_ = P.Greater() self.cast = P.Cast() self.sqrt = P.Sqrt() self.max_op = P.Maximum() self.shape = P.Shape() self.reshape = P.Reshape() self.fill = P.Fill() self.expand_dims = P.ExpandDims() self.dtype = P.DType()
def __init__(self, num_classes, feature_shape, backbone, channel, depth, infer_scale_sizes, atrous_rates, decoder_output_stride, output_stride, fine_tune_batch_norm, image_pyramid): super(DeepLabV3, self).__init__() self.infer_scale_sizes = [] if infer_scale_sizes is not None: self.infer_scale_sizes = infer_scale_sizes self.infer_scale_sizes = infer_scale_sizes if image_pyramid is None: image_pyramid = [1.0] self.image_pyramid = image_pyramid scale_sizes = [] for pyramid in image_pyramid: scale_sizes.append(pyramid) for scale in infer_scale_sizes: scale_sizes.append(scale) self.samples = [] for scale_size in scale_sizes: self.samples.append(SampleBlock(feature_shape, scale_size)) self.samples = nn.CellList(self.samples) self.deeplabv3 = SingleDeepLabV3( num_classes=num_classes, feature_shape=feature_shape, backbone=resnet50_dl(fine_tune_batch_norm), channel=channel, depth=depth, scale_sizes=scale_sizes, atrous_rates=atrous_rates, decoder_output_stride=decoder_output_stride, output_stride=output_stride, fine_tune_batch_norm=fine_tune_batch_norm) self.softmax = P.Softmax(axis=1) self.concat = P.Concat(axis=2) self.expand_dims = P.ExpandDims() self.reduce_mean = P.ReduceMean() self.argmax = P.Argmax(axis=1) self.sample_common = P.ResizeBilinear( (int(feature_shape[2]), int(feature_shape[3])), align_corners=True)
def __init__(self, network, hparams): super(NetWithLossClass, self).__init__(auto_prefix=False) self.network = network self.hparams = hparams self.ReduceMean_false = P.ReduceMean(keep_dims=False) self.expand_op = P.ExpandDims() self.transpose_op = P.Transpose() self.reshape_op = P.Reshape() self.is_mulaw_quant = is_mulaw_quantize(hparams.input_type) if self.is_mulaw_quant: self.criterion = MaskedCrossEntropyLoss() else: if hparams.output_distribution == "Logistic": self.criterion = DiscretizedMixturelogisticLoss(hparams) elif hparams.output_distribution == "Normal": self.criterion = MixtureGaussianLoss(hparams) else: self.criterion = None raise RuntimeError( "Not supported output distribution type: {}".format( hparams.output_distribution))
def __init__(self, probs=None, logits=None, seed=0, dtype=mstype.int32, name="Categorical"): param = dict(locals()) valid_dtype = mstype.int_type check_type(dtype, valid_dtype, "Categorical") super(Categorical, self).__init__(seed, dtype, name, param) if (probs is None) == (logits is None): raise_probs_logits_error() self.reduce_sum = P.ReduceSum(keep_dims=True) self.log = P.Log() self.exp = P.Exp() self.shape = P.Shape() self.reshape = P.Reshape() self.div = P.RealDiv() self.size = P.Size() self.mutinomial = P.Multinomial(seed=seed) self.cast = P.Cast() self.expandim = P.ExpandDims() self.gather = P.GatherNd() self.concat = P.Concat(-1) if probs is not None: self._probs = cast_to_tensor(probs, mstype.float32) input_sum = self.reduce_sum(self._probs, -1) self._probs = self.div(self._probs, input_sum) self._logits = probs_to_logits(self._probs) self._param = self._probs else: self._logits = cast_to_tensor(logits, mstype.float32) input_sum = self.reduce_sum(self.exp(self._logits), -1) self._logits = self._logits - self.log(input_sum) self._probs = logits_to_probs(self._logits) self._param = self._logits self._num_events = self.shape(self._param)[-1] self._param2d = self.reshape(self._param, (-1, self._num_events)) self._batch_shape = self.shape(self._param2d)[0]
def __init__(self): super(ComputeDescriptor, self).__init__() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cast = P.Cast() self.rsum = P.ReduceSum() self.broadcastto = P.BroadcastTo((1, 192 * 138)) self.broadcastto1 = P.BroadcastTo((1, 192, 138, 3)) self.broadcastto2 = P.BroadcastTo((1, 192, 138, 3, 3)) self.broadcastto3 = P.BroadcastTo((1, 192, 138, 4)) self.broadcastto4 = P.BroadcastTo((1, 192, 138, 4, 3)) self.expdims = P.ExpandDims() self.concat = P.Concat(axis=3) self.gather = P.GatherV2() self.mul = P.Mul() self.slice = P.Slice() self.square = P.Square() self.inv = P.Inv() self.sqrt = P.Sqrt() self.ones = P.OnesLike() self.eye = P.Eye()
def __init__(self, vocab_len, word_len, num_classes, vec_length): super(TextCNN, self).__init__() self.vec_length = vec_length self.word_len = word_len self.num_classes = num_classes self.unsqueeze = P.ExpandDims() self.embedding = nn.Embedding(vocab_len, self.vec_length, embedding_table='normal') self.slice = P.Slice() self.layer1 = self.make_layer(kernel_height=3) self.layer2 = self.make_layer(kernel_height=4) self.layer3 = self.make_layer(kernel_height=5) self.concat = P.Concat(1) self.fc = nn.Dense(96 * 3, self.num_classes) self.drop = nn.Dropout(keep_prob=0.5) self.print = P.Print() self.reducemean = P.ReduceMax(keep_dims=False)
def __init__(self, input_size, batch_size=64, hidden_size=512, num_layer=2): super(StackedRNNForGPU, self).__init__() self.batch_size = batch_size self.input_size = input_size self.num_classes = 11 self.reshape = P.Reshape() self.cast = P.Cast() k = (1 / hidden_size)**0.5 weight_shape = 4 * hidden_size * (input_size + 3 * hidden_size + 4) self.weight = Parameter(np.random.uniform( -k, k, (weight_shape, 1, 1)).astype(np.float32), name='weight') self.h = Tensor( np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32)) self.c = Tensor( np.zeros(shape=(num_layer, batch_size, hidden_size)).astype(np.float32)) self.lstm = nn.LSTM(input_size, hidden_size, num_layers=2) self.lstm.weight = self.weight self.fc_weight = np.random.random( (self.num_classes, hidden_size)).astype(np.float32) self.fc_bias = np.random.random(self.num_classes).astype(np.float32) self.fc = nn.Dense(in_channels=hidden_size, out_channels=self.num_classes, weight_init=Tensor(self.fc_weight), bias_init=Tensor(self.fc_bias)) self.fc.to_float(mstype.float32) self.expand_dims = P.ExpandDims() self.concat = P.Concat() self.transpose = P.Transpose()
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=True): super(QuanConv, self).__init__(in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias) self.floor = P.Floor() self.expand_dims = P.ExpandDims() self.x_lower_bound = Tensor(0, ms.float32) self.x_upper_bound = Tensor(2**8 - 1, ms.float32) self.w_lower_bound = Tensor(-2**7 - 1, ms.float32) self.w_upper_bound = Tensor(2**7, ms.float32) self.scale_a = Parameter(initializer('ones', [1])) self.scale_w = Parameter(initializer('ones', [out_channels])) self.zp_a = Parameter(initializer('ones', [1]))
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mstype.float32, padding_idx=None): super(Embedding, self).__init__() self.vocab_size = validator.check_value_type('vocab_size', vocab_size, [int], self.cls_name) self.embedding_size = validator.check_value_type( 'embedding_size', embedding_size, [int], self.cls_name) validator.check_value_type('use_one_hot', use_one_hot, [bool], self.cls_name) validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name) self.use_one_hot = use_one_hot self.dtype = dtype self.init_tensor = initializer(embedding_table, [vocab_size, embedding_size]) self.padding_idx = padding_idx if padding_idx is not None: self.padding_idx = validator.check_int_range( padding_idx, 0, vocab_size, Rel.INC_BOTH, "padding_idx", self.cls_name) self.init_tensor = self.init_tensor.to_tensor().asnumpy() self.init_tensor[self.padding_idx] = 0 self.embedding_table = Parameter(self.init_tensor, name='embedding_table') self.expand = P.ExpandDims() self.reshape_flat = P.Reshape() self.shp_flat = (-1, ) self.gather = P.GatherV2() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, self.dtype) self.off_value = Tensor(0.0, self.dtype) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shp = P.Shape()
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, weight_decay=0.0, loss_scale=1.0, num_hidden_layers=24, batch_size=12, damping=0.03, decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32)) self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.matmul = P.MatMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.gather = P.Gather() self.matrix_A_inv = () self.matrix_G_inv = () self.num_hidden_layers = num_hidden_layers self.sqrt = P.Sqrt() self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.expand = P.ExpandDims() self.square = P.Square() self.inv = P.Inv() self.batch_size = batch_size self.damping = damping self.one = Tensor(1, mstype.int32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_g = DistributedGradReducerThor(self.parameters, 3, mean, degree)
def __init__(self, kernel_size=1, stride=1, pad_mode="valid"): super(MaxPool1d, self).__init__(kernel_size, stride, pad_mode) validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) validator.check_value_type('stride', stride, [int], self.cls_name) self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.cls_name) validator.check_int(kernel_size, 1, Rel.GE, "kernel_size", self.cls_name) validator.check_int(stride, 1, Rel.GE, "stride", self.cls_name) self.kernel_size = (1, kernel_size) self.stride = (1, stride) self.max_pool = P.MaxPool(ksize=self.kernel_size, strides=self.stride, padding=self.pad_mode) self.max_pool_with_arg_max = P.MaxPoolWithArgmax( ksize=self.kernel_size, strides=self.stride, padding=self.pad_mode) self.shape = F.shape self.reduce_mean = P.ReduceMean(keep_dims=True) self.expand = P.ExpandDims() self.squeeze = P.Squeeze(2) self.is_tbe = context.get_context("device_target") == "Ascend"
def __init__(self, config, batch_size, num_classes, target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2)): super(RcnnMask, self).__init__() cfg = config self.rcnn_loss_mask_fb_weight = Tensor( np.array(cfg.rcnn_loss_mask_fb_weight).astype(np.float16)) self.rcnn_mask_out_channels = cfg.rcnn_mask_out_channels self.target_means = target_means self.target_stds = target_stds self.num_classes = num_classes self.in_channels = cfg.rcnn_in_channels self.fpn_mask = FpnMask(self.in_channels, self.rcnn_mask_out_channels, self.num_classes) self.logicaland = P.LogicalAnd() self.loss_mask = P.SigmoidCrossEntropyWithLogits() self.onehot = P.OneHot() self.greater = P.Greater() self.cast = P.Cast() self.sum_loss = P.ReduceSum() self.tile = P.Tile() self.expandims = P.ExpandDims() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.num_bboxes = cfg.num_expected_pos_stage2 * batch_size rmv_first = np.ones((self.num_bboxes, self.num_classes)) rmv_first[:, 0] = np.zeros((self.num_bboxes, )) self.rmv_first_tensor = Tensor(rmv_first.astype(np.float16)) self.mean_loss = P.ReduceMean()
def __init__(self, neg_item_num, l2_embed, dist_reg): super(BGCFLoss, self).__init__() self.neg_item_num = neg_item_num self.l2_embed = l2_embed self.dist_reg = dist_reg self.log = P.Log() self.pow = P.Pow() self.cast = P.Cast() self.tile = P.Tile() self.shape = P.Shape() self.reshape = P.Reshape() self.concat = P.Concat(1) self.concat2 = P.Concat(2) self.split = P.Split(0, 2) self.reduce_sum = P.ReduceSum() self.expand_dims = P.ExpandDims() self.multiply = P.Mul() self.matmul = P.BatchMatMul() self.squeeze = P.Squeeze(1) self.transpose = P.Transpose() self.l2_loss = P.L2Loss() self.sigmoid = P.Sigmoid()
class ReshapeNet(nn.Cell): def __init__(self, shape): super(ReshapeNet, self).__init__() self.shape = shape self.op = P.Reshape() def construct(self, x): return self.op(x, self.shape) raise_set = [ # input is scala, not Tensor ('ExpandDims0', { 'block': (P.ExpandDims(), { 'exception': TypeError, 'error_keywords': ['ExpandDims'] }), 'desc_inputs': [5.0, 1], 'skip': ['backward'] }), # axis is as a parameter ('ExpandDims1', { 'block': (P.ExpandDims(), { 'exception': TypeError, 'error_keywords': ['ExpandDims'] }), 'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), 1], 'skip': ['backward'] }),
def __init__(self, axis): super(ExpandDimsNet, self).__init__() self.axis = axis self.op = P.ExpandDims()
def __init__(self, dim=0): super(Stack, self).__init__() self.dim = dim self.expand_dim = P.ExpandDims() self.concat = P.Concat(axis=dim)
def __init__(self, batch_size, seq_length, vocab_size, decoder, beam_width=4, length_penalty_weight=1.0, max_decode_length=128, sos_id=1, eos_id=2, compute_type=mstype.float32): super(BeamSearchDecoder, self).__init__(auto_prefix=False) self.seq_length = seq_length self.batch_size = batch_size self.vocab_size = vocab_size self.beam_width = beam_width self.length_penalty_weight = length_penalty_weight self.max_decode_length = max_decode_length self.decoder = decoder self.add = P.TensorAdd() self.expand = P.ExpandDims() self.reshape = P.Reshape() self.shape_flat = (-1, ) self.shape = P.Shape() self.zero_tensor = Tensor(np.zeros([batch_size, beam_width]), mstype.float32) self.ninf_tensor = Tensor(np.full([batch_size, beam_width], -INF), mstype.float32) self.select = P.Select() self.flat_shape = (batch_size, beam_width * vocab_size) self.topk = P.TopK(sorted=True) self.floor_div = P.FloorDiv() self.vocab_size_tensor = Tensor(self.vocab_size, mstype.int32) self.real_div = P.RealDiv() self.mod = Mod() self.equal = P.Equal() self.eos_ids = Tensor(np.full([batch_size, beam_width], eos_id), mstype.int32) beam_ids = np.tile( np.arange(beam_width).reshape((1, beam_width)), [batch_size, 1]) self.beam_ids = Tensor(beam_ids, mstype.int32) batch_ids = np.arange(batch_size * beam_width).reshape( (batch_size, beam_width)) // beam_width self.batch_ids = Tensor(batch_ids, mstype.int32) self.concat = P.Concat(axis=-1) self.gather_nd = P.GatherNd() self.greater_equal = P.GreaterEqual() self.sub = P.Sub() self.cast = P.Cast() self.zeroslike = P.ZerosLike() # init inputs and states self.start_ids = Tensor(np.full([batch_size * beam_width, 1], sos_id), mstype.int32) self.init_seq = Tensor(np.full([batch_size, beam_width, 1], sos_id), mstype.int32) init_scores = np.tile(np.array([[0.] + [-INF] * (beam_width - 1)]), [batch_size, 1]) self.init_scores = Tensor(init_scores, mstype.float32) self.init_finished = Tensor( np.zeros([batch_size, beam_width], dtype=np.bool)) self.init_length = Tensor( np.zeros([batch_size, beam_width], dtype=np.int32)) self.length_penalty = LengthPenalty(weight=length_penalty_weight) self.one = Tensor(1, mstype.int32)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) Validator.check_value_type("stride", stride, [int], self.cls_name) Validator.check_value_type("padding", padding, [int], self.cls_name) Validator.check_value_type("dilation", dilation, [int], self.cls_name) Validator.check_integer('kernel_size', kernel_size, 1, Rel.GE, self.cls_name) Validator.check_integer('stride', stride, 1, Rel.GE, self.cls_name) Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name) Validator.check_integer('dilation', dilation, 1, Rel.GE, self.cls_name) kernel_size = (1, kernel_size) stride = (1, stride) dilation = (1, dilation) get_shape = P.Shape() get_dtype = P.DType() if isinstance(weight_init, Tensor): weight_init_shape = get_shape(weight_init) Validator.check_integer('weight_init_shape', len(weight_init_shape), 3, Rel.EQ, self.cls_name) weight_init_dtype = get_dtype(weight_init) weight_init_value = weight_init.asnumpy() weight_init_value = np.expand_dims(weight_init_value, 2) weight_init = Tensor(weight_init_value, weight_init_dtype) super(Conv1d, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init) self.padding = (0, 0, padding, padding) self.conv2d = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group) self.bias_add = P.BiasAdd() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError('Attr \'pad_mode\' of \'Conv1d\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2) self.shape = P.Shape()
def test_expand_dims(): input_tensor = Tensor(np.array([[2, 2], [2, 2]])) expand_dims = P.ExpandDims() output = expand_dims(input_tensor, 0) assert output.asnumpy().shape == (1, 2, 2)
def __init__(self, batch_size, from_tensor_width, to_tensor_width, from_seq_length, to_seq_length, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, has_attention_mask=False, attention_probs_dropout_prob=0.0, use_one_hot_embeddings=False, initializer_range=0.02, do_return_2d_tensor=False, use_relative_positions=False, compute_type=mstype.float32): super(BertAttention, self).__init__() self.batch_size = batch_size self.from_seq_length = from_seq_length self.to_seq_length = to_seq_length self.num_attention_heads = num_attention_heads self.size_per_head = size_per_head self.has_attention_mask = has_attention_mask self.use_relative_positions = use_relative_positions self.scores_mul = 1.0 / math.sqrt(float(self.size_per_head)) self.reshape = P.Reshape() self.shape_from_2d = (-1, from_tensor_width) self.shape_to_2d = (-1, to_tensor_width) weight = TruncatedNormal(initializer_range) units = num_attention_heads * size_per_head self.query_layer = nn.Dense(from_tensor_width, units, activation=query_act, weight_init=weight).to_float(compute_type) self.key_layer = nn.Dense(to_tensor_width, units, activation=key_act, weight_init=weight).to_float(compute_type) self.value_layer = nn.Dense(to_tensor_width, units, activation=value_act, weight_init=weight).to_float(compute_type) self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head) self.shape_to = ( batch_size, to_seq_length, num_attention_heads, size_per_head) self.matmul_trans_b = P.BatchMatMul(transpose_b=True) self.multiply = P.Mul() self.transpose = P.Transpose() self.trans_shape = (0, 2, 1, 3) self.trans_shape_relative = (2, 0, 1, 3) self.trans_shape_position = (1, 2, 0, 3) self.multiply_data = -10000.0 self.batch_num = batch_size * num_attention_heads self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) if self.has_attention_mask: self.expand_dims = P.ExpandDims() self.sub = P.Sub() self.add = P.TensorAdd() self.cast = P.Cast() self.get_dtype = P.DType() if do_return_2d_tensor: self.shape_return = (batch_size * from_seq_length, num_attention_heads * size_per_head) else: self.shape_return = (batch_size, from_seq_length, num_attention_heads * size_per_head) self.cast_compute_type = SaturateCast(dst_type=compute_type) if self.use_relative_positions: self._generate_relative_positions_embeddings = \ RelaPosEmbeddingsGenerator(length=to_seq_length, depth=size_per_head, max_relative_position=16, initializer_range=initializer_range, use_one_hot_embeddings=use_one_hot_embeddings)
def __init__(self, config, use_one_hot_embeddings=False): super(TransformerInferModel, self).__init__() config = copy.deepcopy(config) config.hidden_dropout_prob = 0.0 config.attention_dropout_prob = 0.0 self.batch_size = config.batch_size self.seq_length = config.seq_length self.hidden_size = config.hidden_size self.num_hidden_layers = config.num_hidden_layers self.embedding_size = config.hidden_size self.attn_embed_dim = config.hidden_size self.num_layers = config.num_hidden_layers self.last_idx = self.num_hidden_layers - 1 self.embedding_lookup = EmbeddingLookup( vocab_size=config.vocab_size, embed_dim=self.embedding_size, use_one_hot_embeddings=use_one_hot_embeddings) self.positional_embedding = PositionalEmbedding( embedding_size=self.embedding_size, max_position_embeddings=config.max_position_embeddings) # use for infer self.projection = PredLogProbs(batch_size=config.batch_size * config.beam_width, seq_length=1, width=self.hidden_size, compute_type=config.compute_type) self.encoder = TransformerEncoder( attn_embed_dim=self.attn_embed_dim, encoder_layers=self.num_layers, num_attn_heads=config.num_attention_heads, intermediate_size=config.intermediate_size, attention_dropout_prob=config.attention_dropout_prob, initializer_range=config.initializer_range, hidden_dropout_prob=config.hidden_dropout_prob, hidden_act=config.hidden_act, compute_type=config.compute_type) decoder_cell = TransformerDecoderStep( config=config, num_hidden_layers=config.num_hidden_layers, attn_embed_dim=self.attn_embed_dim, seq_length=config.seq_length, num_attn_heads=config.num_attention_heads, intermediate_size=config.intermediate_size, hidden_dropout_prob=config.hidden_dropout_prob, compute_type=config.compute_type, initializer_range=config.initializer_range, hidden_act="relu", embedding_lookup=self.embedding_lookup, positional_embedding=self.positional_embedding, attn_dropout_prob=config.attention_dropout_prob, projection=self.projection) # link beam_search after decoder self.decoder = BeamSearchDecoder( batch_size=config.batch_size, seq_length=config.seq_length, vocab_size=config.vocab_size, decoder=decoder_cell, beam_width=config.beam_width, length_penalty_weight=config.length_penalty_weight, max_decode_length=config.max_decode_length) self.cast = P.Cast() self.dtype = config.dtype self.cast_compute_type = SaturateCast(dst_type=config.compute_type) self.expand = P.ExpandDims() self.multiply = P.Mul() self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask( config) # use for infer self.tile_beam = TileBeam(beam_width=config.beam_width) ones = np.ones(shape=(config.batch_size, config.max_decode_length)) self.encode_mask = Tensor(ones, dtype=mstype.float32) self.scale = Tensor([math.sqrt(float(self.embedding_size))], dtype=mstype.float32) self.reshape = P.Reshape()
def __init__(self, mul_weight, strategy1=None, strategy2=None): super().__init__() self.expand_dims = P.ExpandDims().shard(strategy1) self.mul = P.Mul().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1")
def __init__(self, config, representation_size, batch_size, num_classes, target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2)): super(Rcnn, self).__init__() cfg = config self.rcnn_loss_cls_weight = Tensor( np.array(cfg.rcnn_loss_cls_weight).astype(np.float16)) self.rcnn_loss_reg_weight = Tensor( np.array(cfg.rcnn_loss_reg_weight).astype(np.float16)) self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels self.target_means = target_means self.target_stds = target_stds self.num_classes = num_classes self.in_channels = cfg.rcnn_in_channels self.train_batch_size = batch_size self.test_batch_size = cfg.test_batch_size self.use_ambigous_sample = cfg.use_ambigous_sample shape_0 = (self.rcnn_fc_out_channels, representation_size) weights_0 = initializer("XavierUniform", shape=shape_0[::-1], dtype=mstype.float16).to_tensor() shape_1 = (self.rcnn_fc_out_channels, self.rcnn_fc_out_channels) weights_1 = initializer("XavierUniform", shape=shape_1[::-1], dtype=mstype.float16).to_tensor() self.shared_fc_0 = DenseNoTranpose(representation_size, self.rcnn_fc_out_channels, weights_0) self.shared_fc_1 = DenseNoTranpose(self.rcnn_fc_out_channels, self.rcnn_fc_out_channels, weights_1) cls_weight = initializer( 'Normal', shape=[num_classes, self.rcnn_fc_out_channels][::-1], dtype=mstype.float16).to_tensor() reg_weight = initializer( 'Normal', shape=[num_classes * 4, self.rcnn_fc_out_channels][::-1], dtype=mstype.float16).to_tensor() self.cls_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes, cls_weight) self.reg_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes * 4, reg_weight) self.flatten = P.Flatten() self.relu = P.ReLU() self.logicaland = P.LogicalAnd() self.loss_cls = P.SoftmaxCrossEntropyWithLogits() self.loss_bbox = P.SmoothL1Loss(beta=1.0) self.reshape = P.Reshape() self.onehot = P.OneHot() self.greater = P.Greater() self.equal = P.Equal() self.cast = P.Cast() self.sum_loss = P.ReduceSum() self.tile = P.Tile() self.expandims = P.ExpandDims() self.gather = P.GatherNd() self.argmax = P.ArgMaxWithValue(axis=1) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.value = Tensor(1.0, mstype.float16) self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2) * batch_size if self.use_ambigous_sample: self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_amb_stage2 + cfg.num_expected_neg_stage2) * batch_size rmv_first = np.ones((self.num_bboxes, self.num_classes)) rmv_first[:, 0] = np.zeros((self.num_bboxes, )) self.rmv_first_tensor = Tensor(rmv_first.astype(np.float16)) self.num_bboxes_test = cfg.rpn_max_num * cfg.test_batch_size range_max = np.arange(self.num_bboxes_test).astype(np.int32) self.range_max = Tensor(range_max)
def __init__(self, vocab_size, embedding_size, field_size, param_init='normal', target='CPU', slice_mode='batch_slice', feature_num_list=None, max_norm=None, sparse=True, operator='SUM'): super(MultiFieldEmbeddingLookup, self).__init__(vocab_size, embedding_size, param_init, target, slice_mode, feature_num_list, max_norm, sparse) self.field_size = validator.check_positive_int(field_size, 'field_size') self.operator = operator self.mul = P.Mul() self.inf_mask_mul = P.Mul() self.bias_add = P.Add() self.inf_add = P.Add() self.merge_op = None self.count_op = P.UnsortedSegmentSum() self.abs = P.Abs() self.equal = P.Equal() self.add = P.Add() self.cast = P.Cast() self.div_no_nan = P.DivNoNan() self.expand = P.ExpandDims() self.max_mask_mul = P.Mul() self.max_no_equal = P.NotEqual() if operator == MultiFieldEmbeddingLookup.OPERATOR_SUM: self.merge_op = P.UnsortedSegmentSum() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.merge_op = P.UnsortedSegmentMax() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MEAN: self.merge_op = P.UnsortedSegmentSum() else: raise ValueError( "The operator supports ['SUM', 'MAX', 'MEAN'], but found: " + str(operator)) parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) if slice_mode in ["table_row_slice", "batch_slice" ] and is_auto_parallel: self.merge_op.shard( ((get_group_size(), 1, 1), (get_group_size(), 1))) self.expand.shard(((get_group_size(), ), )) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) self.count_op.shard(((get_group_size(), 1), (get_group_size(), 1))) self.add.shard(((get_group_size(), ), (get_group_size(), ))) self.div_no_nan.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_mask_mul.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((get_group_size(), 1, 1), ())) self.inf_mask_mul.shard(((get_group_size(), 1, 1), ())) self.merge_op.shard( ((get_group_size(), 1), (get_group_size(), ))) self.count_op.shard( ((get_group_size(), ), (get_group_size(), ))) self.inf_add.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) elif slice_mode == "table_column_slice" and is_auto_parallel: self.merge_op.shard(((1, 1, get_group_size()), (1, 1))) self.div_no_nan.shard(((1, get_group_size()), (1, 1))) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard(((1, 1, 1), (1, 1, get_group_size()))) self.count_op.shard(((1, 1), (1, 1))) self.add.shard(((1, ), (1, ))) self.max_mask_mul.shard(((1, get_group_size()), (1, 1))) self.expand.shard(((1, ), )) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((1, 1, 1), ())) self.inf_mask_mul.shard(((1, 1, 1), ())) self.merge_op.shard(((1, get_group_size()), (1, ))) self.count_op.shard(((1, ), (1, ))) self.inf_add.shard(((1, 1, get_group_size()), (1, 1, 1))) else: if is_auto_parallel: raise ValueError( "slice_mode should be ['table_row_slice', 'batch_slice' and \ 'table_column_slice'], but get " + str(slice_mode)) # Min value for fp32 self.negative_inf_value = -3.402823466E+38
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) Validator.check_value_type("stride", stride, [int], self.cls_name) Validator.check_value_type("padding", padding, [int], self.cls_name) Validator.check_value_type("dilation", dilation, [int], self.cls_name) Validator.check_integer('kernel_size', kernel_size, 1, Rel.GE, self.cls_name) Validator.check_integer('stride', stride, 1, Rel.GE, self.cls_name) Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name) Validator.check_integer('dilation', dilation, 1, Rel.GE, self.cls_name) kernel_size = (1, kernel_size) stride = (1, stride) dilation = (1, dilation) get_shape = P.Shape() get_dtype = P.DType() if isinstance(weight_init, Tensor): weight_init_shape = get_shape(weight_init) Validator.check_integer('weight_init_shape', len(weight_init_shape), 3, Rel.EQ, self.cls_name) weight_init_dtype = get_dtype(weight_init) weight_init_value = weight_init.asnumpy() weight_init_value = np.expand_dims(weight_init_value, 2) weight_init = Tensor(weight_init_value, weight_init_dtype) # out_channels and in_channels swap. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. super(Conv1dTranspose, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, transposed=True) self.padding = (0, 0, padding, padding) self.in_channels = in_channels self.out_channels = out_channels self.shape = P.Shape() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError('Attr \'pad_mode\' of \'Conv1dTranspose\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.is_valid = self.pad_mode == 'valid' self.is_same = self.pad_mode == 'same' self.is_pad = self.pad_mode == 'pad' if check_bool(has_bias): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, kernel_size=kernel_size, mode=1, pad_mode=pad_mode, pad=self.padding, stride=stride, dilation=dilation, group=group) self.bias_add = P.BiasAdd() self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2)
def __init__( self, out_channels=256, layers=20, stacks=2, residual_channels=512, gate_channels=512, skip_out_channels=512, kernel_size=3, dropout=1 - 0.95, cin_channels=-1, gin_channels=-1, n_speakers=None, upsample_conditional_features=False, upsample_net="ConvInUpsampleNetwork", upsample_params=None, scalar_input=False, use_speaker_embedding=False, output_distribution="Logistic", cin_pad=0, ): super(WaveNet, self).__init__() self.transpose_op = P.Transpose() self.softmax = P.Softmax(axis=1) self.reshape_op = P.Reshape() self.zeros_op = P.Zeros() self.ones_op = P.Ones() self.relu_op = P.ReLU() self.squeeze_op = P.Squeeze() self.expandim_op = P.ExpandDims() self.transpose_op = P.Transpose() self.tile_op = P.Tile() self.scalar_input = scalar_input self.out_channels = out_channels self.cin_channels = cin_channels self.output_distribution = output_distribution self.fack_data = P.Zeros() assert layers % stacks == 0 layers_per_stack = layers // stacks if scalar_input: self.first_conv = Conv1d1x1(1, residual_channels) else: self.first_conv = Conv1d1x1(out_channels, residual_channels) conv_layers = [] for layer in range(layers): dilation = 2**(layer % layers_per_stack) conv = ResidualConv1dGLU(residual_channels, gate_channels, kernel_size=kernel_size, skip_out_channels=skip_out_channels, bias=True, dropout=dropout, dilation=dilation, cin_channels=cin_channels, gin_channels=gin_channels) conv_layers.append(conv) self.conv_layers = nn.CellList(conv_layers) self.last_conv_layers = nn.CellList([ nn.ReLU(), Conv1d1x1(skip_out_channels, skip_out_channels), nn.ReLU(), Conv1d1x1(skip_out_channels, out_channels) ]) if gin_channels > 0 and use_speaker_embedding: assert n_speakers is not None self.embed_speakers = Embedding(n_speakers, gin_channels, padding_idx=None, std=0.1) else: self.embed_speakers = None if upsample_conditional_features: self.upsample_net = getattr(upsample, upsample_net)(**upsample_params) else: self.upsample_net = None self.factor = math.sqrt(1.0 / len(self.conv_layers))