def __init__(self, config): super(GetMaskedLMOutput, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.gather = P.GatherV2() weight_init = TruncatedNormal(config.initializer_range) self.dense = Dense_Thor(in_channels=self.width, out_channels=config.hidden_size, weight_init=weight_init, has_bias=True, bias_init='zeros', damping=damping, loss_scale=loss_scale, frequency=frequency, activation=config.hidden_act, batch_size=batch_size).to_float( config.compute_type) self.layernorm = nn.LayerNorm( (config.hidden_size, )).to_float(config.compute_type) self.output_bias = Parameter(initializer('zero', config.vocab_size), name='output_bias') self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_offsets = (-1, 1) self.rng = Tensor( np.array(range(0, config.batch_size)).astype(np.int32)) self.last_idx = (-1, ) self.shape_flat_sequence_tensor = (config.batch_size * config.seq_length, self.width) self.seq_length_tensor = Tensor( np.array((config.seq_length, )).astype(np.int32)) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype
def __init__(self, config): super(GetMaskedLMOutput, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.gather = P.Gather() weight_init = TruncatedNormal(config.initializer_range) self.dense = nn.Dense(self.width, config.hidden_size, weight_init=weight_init, activation=config.hidden_act).to_float(config.compute_type) self.layernorm = nn.LayerNorm((config.hidden_size,)).to_float(config.compute_type) self.output_bias = Parameter( initializer( 'zero', config.vocab_size)) self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_offsets = (-1, 1) self.last_idx = (-1,) self.shape_flat_sequence_tensor = (-1, self.width) self.seq_length_tensor = Tensor(np.array((config.seq_length,)).astype(np.int32)) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype
def __init__(self, config): super(CrossEntropyLoss, self).__init__() self.log_softmax = nn.LogSoftmax(axis=-1) self.mean = P.ReduceMean() self.sum = P.ReduceSum() self.onehot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.vocab_size = config.vocab_size
def construct(self, input_ids): """evaluation net""" input_mask = F.cast(F.not_equal(input_ids, 0), mstype.float32) logits = self.backbone(input_ids, input_mask) outputs = None if self.generate: outputs = nn.LogSoftmax()(logits) outputs = F.tensor_pow(np.e, outputs) else: outputs = self.argmax(logits) return outputs
def __init__(self, config): super(PredLogProbs, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_sequence_tensor = (config.batch_size * config.seq_length, self.width) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype self.get_shape = P.Shape()
def __init__(self, config, is_training=True): super(Decoder, self).__init__() self.vocab_size = config.ch_vocab_size self.hidden_size = config.hidden_size self.trans = P.Transpose() self.perm = (1, 0, 2) self.embedding = nn.Embedding(self.vocab_size, self.hidden_size) self.gru = GRU(config, is_training=is_training).to_float(mstype.float16) self.dense = nn.Dense(self.hidden_size, self.vocab_size) self.softmax = nn.LogSoftmax(axis=2) self.cast = P.Cast()
def __init__(self, batch_size, seq_length, width, compute_type=mstype.float32, dtype=mstype.float32): super(PredLogProbs, self).__init__() self.batch_size = batch_size self.seq_length = seq_length self.width = width self.compute_type = compute_type self.dtype = dtype self.log_softmax = nn.LogSoftmax(axis=-1) self.cast = P.Cast()
def __init__(self, batch_size, seq_length, width, compute_type=mstype.float32, dtype=mstype.float32): super(PredLogProbs, self).__init__() self.batch_size = batch_size self.seq_length = seq_length self.width = width self.compute_type = compute_type self.dtype = dtype self.reshape = P.Reshape() self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_sequence_tensor = (self.batch_size * self.seq_length, self.width) self.cast = P.Cast()
def __init__(self, weight=None, gamma=2.0, reduction='mean'): super(FocalLoss, self).__init__(reduction=reduction) self.gamma = validator.check_value_type("gamma", gamma, [float]) if weight is not None and not isinstance(weight, Tensor): raise TypeError( "The type of weight should be Tensor, but got {}.".format( type(weight))) if isinstance(weight, Tensor) and weight.ndim != 1: raise ValueError( "The dimension of weight should be 1, but got {}.".format( weight.ndim)) self.weight = weight self.expand_dims = P.ExpandDims() self.gather_d = P.GatherD() self.squeeze = P.Squeeze(axis=1) self.tile = P.Tile() self.cast = P.Cast() self.dtype = P.DType() self.logsoftmax = nn.LogSoftmax(1)
def __init__(self, vocab_size, embedding_dims, num_class): super(FastText, self).__init__() self.vocab_size = vocab_size self.embeding_dims = embedding_dims self.num_class = num_class self.embeding_func = nn.Embedding(vocab_size=self.vocab_size, embedding_size=self.embeding_dims, padding_idx=0, embedding_table='Zeros') self.fc = nn.Dense(self.embeding_dims, out_channels=self.num_class, weight_init=XavierUniform(1)).to_float( mstype.float16) self.reducesum = P.ReduceSum() self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(axis=1) self.cast = P.Cast() self.tile = P.Tile() self.realdiv = P.RealDiv() self.fill = P.Fill() self.log_softmax = nn.LogSoftmax(axis=1)
def construct(self, predict, target): targets = target _check_ndim(predict.ndim, targets.ndim) _check_channel_and_shape(targets.shape[1], predict.shape[1]) _check_predict_channel(predict.shape[1]) if predict.ndim > 2: predict = predict.view(predict.shape[0], predict.shape[1], -1) targets = targets.view(targets.shape[0], targets.shape[1], -1) else: predict = self.expand_dims(predict, 2) targets = self.expand_dims(targets, 2) log_probability = nn.LogSoftmax(1)(predict) if target.shape[1] == 1: log_probability = self.gather_d( log_probability, 1, self.cast(targets, mindspore.int32)) log_probability = self.squeeze(log_probability) probability = F.exp(log_probability) if self.weight is not None: convert_weight = self.weight[None, :, None] convert_weight = self.tile(convert_weight, (targets.shape[0], 1, targets.shape[2])) if target.shape[1] == 1: convert_weight = self.gather_d( convert_weight, 1, self.cast(targets, mindspore.int32)) convert_weight = self.squeeze(convert_weight) probability = log_probability * convert_weight weight = F.pows(-probability + 1.0, self.gamma) if target.shape[1] == 1: loss = (-weight * log_probability).mean(axis=1) else: loss = (-weight * targets * log_probability).mean(axis=-1) return self.get_loss(loss)
def __init__(self, config, is_training=True, dropout=0.1): super(Decoder, self).__init__() self.vocab_size = config.ch_vocab_size self.hidden_size = config.hidden_size self.max_len = config.max_seq_length self.trans = P.Transpose() self.perm = (1, 0, 2) self.embedding = nn.Embedding(self.vocab_size, self.hidden_size) self.dropout = nn.Dropout(1 - dropout) self.attn = nn.Dense(self.hidden_size, self.max_len) self.softmax = nn.Softmax(axis=2) self.bmm = P.BatchMatMul() self.concat = P.Concat(axis=2) self.attn_combine = nn.Dense(self.hidden_size * 2, self.hidden_size) self.gru = GRU(config, is_training=is_training).to_float(mstype.float16) self.out = nn.Dense(self.hidden_size, self.vocab_size) self.logsoftmax = nn.LogSoftmax(axis=2) self.cast = P.Cast()
def __init__(self, config): super(PredLogProbs, self).__init__() self.reshape = P.Reshape() self.log_softmax = nn.LogSoftmax(axis=-1) self.get_shape = P.Shape()
def __init__(self, network): super(FastTextInferCell, self).__init__(auto_prefix=False) self.network = network self.argmax = P.ArgMaxWithValue(axis=1, keep_dims=True) self.log_softmax = nn.LogSoftmax(axis=1)
def __init__(self, dim): super(LogSoftmaxNet, self).__init__() self.logsoftmax = nn.LogSoftmax(dim)