def __init__(self, config, is_training=True, use_one_hot_embeddings=False): super(GPT2SummarizationModel, self).__init__() self.gpt2 = GPT2Model( config, is_training, use_one_hot_embeddings) self.lm_head = nn.Dense(config.d_model, config.vocab_size, has_bias=False, weight_init=TruncatedNormal(sigma=config.initializer_range)) self.reshape = P.Reshape() self.softmax = P.LogSoftmax(axis=-1) self.log_softmax = P.LogSoftmax(axis=-1) self.batch_size = config.batch_size self.vocab_size = config.vocab_size self.seq_length = config.seq_length self.onehot = P.OneHot() self.print= P.Print()
def __init__(self, config, is_training=True): super(Decoder, self).__init__() self.hidden_size = config.hidden_size self.vocab_size = config.trg_vocab_size self.embedding_size = config.decoder_embedding_size self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \ hidden_size=self.hidden_size).to_float(config.compute_type) self.text_len = config.max_length self.shape = P.Shape() self.transpose = P.Transpose() self.p = P.Print() self.cast = P.Cast() self.concat = P.Concat(axis=2) self.squeeze = P.Squeeze(axis=0) self.expandims = P.ExpandDims() self.log_softmax = P.LogSoftmax(axis=1) weight, bias = dense_default_state( self.embedding_size + self.hidden_size * 3, self.vocab_size) self.fc = nn.Dense(self.embedding_size + self.hidden_size * 3, self.vocab_size, weight_init=weight, bias_init=bias).to_float(config.compute_type) self.attention = Attention(config) self.bmm = P.BatchMatMul() self.dropout = nn.Dropout(0.7) self.expandims = P.ExpandDims() self.dtype = config.dtype
def __init__(self, config): super(GPT_Head, self).__init__() self.matmul = P.MatMul(transpose_b=True) self.embedding_size = config.embedding_size self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.compute_dtype self.cast = P.Cast()
def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1): super(AttnDecoderRNN, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.dropout_p = dropout_p self.max_length = max_length self.embedding = nn.Embedding(self.output_size, self.hidden_size) self.attn = nn.Dense(in_channels=self.hidden_size * 2, out_channels=self.max_length).to_float( mstype.float16) self.attn_combine = nn.Dense(in_channels=self.hidden_size * 2, out_channels=self.hidden_size).to_float( mstype.float16) self.dropout = nn.Dropout(keep_prob=1.0 - self.dropout_p) self.gru = GRU(hidden_size, hidden_size).to_float(mstype.float16) self.out = nn.Dense(in_channels=self.hidden_size, out_channels=self.output_size).to_float( mstype.float16) self.transpose = P.Transpose() self.concat = P.Concat(axis=2) self.concat1 = P.Concat(axis=1) self.softmax = P.Softmax(axis=1) self.relu = P.ReLU() self.log_softmax = P.LogSoftmax(axis=1) self.bmm = P.BatchMatMul() self.unsqueeze = P.ExpandDims() self.squeeze = P.Squeeze(1) self.squeeze1 = P.Squeeze(0) self.cast = P.Cast()
def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False, phase_type="student"): super(BertModelCLS, self).__init__() self.bert = BertModel(config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.weight_init = TruncatedNormal(config.initializer_range) self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype self.num_labels = num_labels self.phase_type = phase_type if self.phase_type == "teacher": self.dense = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) else: self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init, has_bias=True).to_float( config.compute_type) self.dropout = nn.ReLU()
def __init__(self, config, is_training, num_labels=11, dropout_prob=0.0, use_one_hot_embeddings=False, phase_type="student"): super(BertModelNER, self).__init__() if not is_training: config.hidden_dropout_prob = 0.0 config.hidden_probs_dropout_prob = 0.0 self.bert = BertModel(config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.weight_init = TruncatedNormal(config.initializer_range) self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype self.num_labels = num_labels self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) self.dropout = nn.ReLU() self.reshape = P.Reshape() self.shape = (-1, config.hidden_size) self.origin_shape = (-1, config.seq_length, self.num_labels)
def __init__(self, config, is_training, use_one_hot_embeddings=False): """ Args: config: the configuration of GPT-2 model is_training (bool): `True` for train (finetune), `False` for evaluation. use_one_hot_embeddings (bool): default False. """ super(GPT2CBTModel, self).__init__() if not is_training: config.summary_first_dropout = 0.0 self.is_training = is_training self.d_model = config.d_model self.batch_size = config.batch_size self.seq_length = config.seq_length self.vocab_size = config.vocab_size self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.shape = P.Shape() self.reshape = P.Reshape() self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype self.lm_head = nn.Dense(config.d_model, config.vocab_size, weight_init=TruncatedNormal( config.initializer_range), has_bias=False).to_float(config.compute_type) self.first_dropout = nn.Dropout(1 - config.summary_first_dropout)
def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6): super().__init__() self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul2 = P.MatMul().set_strategy(strategy2) self.gelu = P.Gelu().set_strategy(strategy3) self.tanh = P.Tanh().set_strategy(strategy4) self.softmax = P.Softmax(axis=(0, 1)).set_strategy(strategy5) self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.matmul1 = P.MatMul().shard(strategy1) self.matmul2 = P.MatMul().shard(strategy2) self.gelu = P.Gelu().shard(strategy3) self.tanh = P.Tanh().shard(strategy3) self.softmax = P.Softmax().shard(strategy3) self.logsoftmax = P.LogSoftmax().shard(strategy3)
def __init__(self): super(SoftmaxCrossEntropyWithLogits, self).__init__() self.log_softmax = P.LogSoftmax() self.neg = P.Neg() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.reduce_sum = P.ReduceSum()
def __init__(self, config): super(GetNextSentenceOutput, self).__init__() self.log_softmax = P.LogSoftmax() weight_init = TruncatedNormal(config.initializer_range) self.dense = nn.Dense(config.hidden_size, 2, weight_init=weight_init, has_bias=True).to_float(config.compute_type) self.dtype = config.dtype self.cast = P.Cast()
def __init__(self, probs=None, seed=None, dtype=mstype.int32, name="Categorical"): param = dict(locals()) param['param_dict'] = {'probs': probs} valid_dtype = mstype.uint_type + mstype.int_type + mstype.float_type Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__) super(Categorical, self).__init__(seed, dtype, name, param) self._probs = self._add_parameter(probs, 'probs') if self.probs is not None: check_rank(self.probs) check_prob(self.probs) check_sum_equal_one(probs) # update is_scalar_batch and broadcast_shape # drop one dimension if self.probs.shape[:-1] == (): self._is_scalar_batch = True self._broadcast_shape = self._broadcast_shape[:-1] self.argmax = P.ArgMaxWithValue(axis=-1) self.broadcast = broadcast_to self.cast = P.Cast() self.clip_by_value = C.clip_by_value self.concat = P.Concat(-1) self.cumsum = P.CumSum() self.dtypeop = P.DType() self.exp = exp_generic self.expand_dim = P.ExpandDims() self.fill = P.Fill() self.gather = P.GatherNd() self.greater = P.Greater() self.issubclass = P.IsSubClass() self.less = P.Less() self.log = log_generic self.log_softmax = P.LogSoftmax() self.logicor = P.LogicalOr() self.logicand = P.LogicalAnd() self.multinomial = P.Multinomial(seed=self.seed) self.reshape = P.Reshape() self.reduce_sum = P.ReduceSum(keep_dims=True) self.select = P.Select() self.shape = P.Shape() self.softmax = P.Softmax() self.squeeze = P.Squeeze() self.squeeze_first_axis = P.Squeeze(0) self.squeeze_last_axis = P.Squeeze(-1) self.square = P.Square() self.transpose = P.Transpose() self.is_nan = P.IsNan() self.index_type = mstype.int32 self.nan = np.nan
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(GPT2CoQA, self).__init__() self.gpt2 = GPT2CoQAModel(config, is_training, use_one_hot_embeddings) self.loss = CrossEntropyCalculation(is_training=is_training) self.is_training = is_training self.num_labels = config.vocab_size self.loss = CrossEntropyCalculation(is_training=is_training) self.log_softmax = P.LogSoftmax(axis=-1) self.label_indices = Tensor(np.array([x for x in range(1, config.seq_length)]), mindspore.int32)
def __init__(self, config): super(PANGUALPHA_Head, self).__init__() if config.word_emb_dp: self.matmul = P.MatMul(transpose_b=True).shard(((config.dp, 1), (1, 1))) else: self.matmul = P.MatMul(transpose_b=True).shard(((config.dp, 1), (config.mp, 1))) self.embedding_size = config.embedding_size self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.compute_dtype self.cast = P.Cast()
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(GPT2Lambada, self).__init__() self.gpt2 = GPT2LambadaModel(config, is_training, use_one_hot_embeddings) self.num_labels = config.vocab_size self.loss = CrossEntropyCalculationWithMask(is_training=is_training, num_labels=self.num_labels, config=config) self.is_training = is_training self.log_softmax = P.LogSoftmax(axis=-1) self.reshape = P.Reshape() self.shape = P.Shape() self.cast = P.Cast()
def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False): super(BertSquadModel, self).__init__() self.bert = BertModel(config, is_training, use_one_hot_embeddings) self.weight_init = TruncatedNormal(config.initializer_range) self.dense1 = nn.Dense(config.hidden_size, num_labels, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) self.num_labels = num_labels self.dtype = config.dtype self.log_softmax = P.LogSoftmax(axis=1) self.is_training = is_training
def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False): super(BertRegressionModel, self).__init__() self.bert = BertModel(config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.weight_init = TruncatedNormal(config.initializer_range) self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype self.num_labels = num_labels self.dropout = nn.Dropout(1 - dropout_prob) self.dense_1 = nn.Dense(config.hidden_size, 1, weight_init=self.weight_init, has_bias=True).to_float(mstype.float16)
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(GPT2CoQAModel, self).__init__() self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings) self.weight_init = TruncatedNormal(config.initializer_range) self.dense1 = nn.Dense(config.d_model, config.vocab_size, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) self.log_softmax = P.LogSoftmax(axis=-1) self.vocab_size = config.vocab_size self.dtype = config.dtype
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(GPT2Translation, self).__init__() self.gpt2 = GPT2TranslationModel(config, is_training, use_one_hot_embeddings) self.num_labels = config.vocab_size self.loss = CrossEntropyCalculationWithMask(is_training=is_training, num_labels=self.num_labels, config=config) self.is_training = is_training self.log_softmax = P.LogSoftmax(axis=-1) self.reshape = P.Reshape() self.shape = P.Shape() self.gather = P.GatherV2() self.indices1 = Tensor(np.array([x for x in range(config.seq_length - 1)]), mindspore.int32) self.indices2 = Tensor(np.array([x for x in range(1, config.seq_length)]), mindspore.int32)
def test_logsoftmax(): x = np.array([[-0.08082921, -0.13706027, -0.4711177, -0.05606057], [-0.46082982, 1.1761844, -1.016654, -1.743829 ], [-1.5062045, 0.6910976, 0.4839723, 1.1502692 ]]).astype(np.float32) expect = np.array([[-1.2939762, -1.3502073, -1.6842647, -1.2692076 ], [-1.9445671, -0.3075528, -2.5003912, -3.2275662 ], [-3.452001, -1.2546989, -1.4618242, -0.79552734]]).astype(np.float32) context.set_context(mode=context.GRAPH_MODE, device_target="GPU") LogSoftmax = P.LogSoftmax() output = LogSoftmax(Tensor(x)) assert np.allclose(output.asnumpy(), expect)
def __init__(self, config, is_training, use_one_hot_embeddings=False, num_labels=10): super(GPT2CBT, self).__init__() self.gpt2 = GPT2CBTModel(config, is_training, use_one_hot_embeddings, num_labels=num_labels) self.loss1 = CrossEntropyCalculation(is_training=is_training) self.loss2 = CrossEntropyCalculation(is_training=is_training) self.mc_num_labels = num_labels self.lm_num_labels = config.vocab_size self.is_training = is_training self.log_softmax = P.LogSoftmax(axis=-1) self.shape = P.Shape() self.reshape = P.Reshape() self.cast = P.Cast()
def validate_model(net, dataset): accs = [] for _, data in enumerate(dataset.create_dict_iterator()): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) output = net(train_x) log_output = P.LogSoftmax(axis=1)(output) acc = np.mean(log_output.asnumpy().argmax(axis=1) == label.asnumpy()) accs.append(acc) acc_mean = np.mean(accs) return acc_mean
def __init__(self, config=None, is_training=None, use_one_hot_embeddings=False): super(GPT2Summarization, self).__init__() self.gpt2 = GPT2SummarizationModel(config, is_training, use_one_hot_embeddings) self.is_training = is_training self.last_idx = (-1,) self.log_softmax = P.LogSoftmax(axis=-1) self.reshape = P.Reshape() self.shape = P.Shape() self.batch_size = config.batch_size self.seq_length = config.seq_length self.vocab_size = config.vocab_size self.cast = P.Cast() self.loss_function = CrossEntropyCalculationWithMask(num_labels=self.vocab_size,is_training=self.is_training,config=config)
def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False, assessment_method=""): super(BertCLSModel, self).__init__() if not is_training: config.hidden_dropout_prob = 0.0 config.hidden_probs_dropout_prob = 0.0 self.bert = BertModel(config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.weight_init = TruncatedNormal(config.initializer_range) self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype self.num_labels = num_labels self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init, has_bias=True).to_float(config.compute_type) self.dropout = nn.Dropout(1 - dropout_prob) self.assessment_method = assessment_method
def train_model(train_net, net, dataset): accs = [] loss_sum = 0 for _, data in enumerate(dataset.create_dict_iterator()): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) loss = train_net(train_x, label) output = net(train_x) log_output = P.LogSoftmax(axis=1)(output) acc = np.mean(log_output.asnumpy().argmax(axis=1) == label.asnumpy()) accs.append(acc) loss_sum += loss.asnumpy() loss_sum = loss_sum / len(accs) acc_mean = np.mean(accs) return loss_sum, acc_mean
def __init__(self, config, is_training, use_one_hot_embeddings=False): """ Args: config: the configuration of GPT-2 model is_training (bool): `True` for train (finetune), `False` for evaluation. use_one_hot_embeddings (bool): default False. """ super(GPT2LambadaModel, self).__init__() if not is_training: config.hidden_dropout = 0.0 self.vocab_size = config.vocab_size self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings) self.cast = P.Cast() self.shape = P.Shape() self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype self.dense1 = nn.Dense(config.d_model, config.vocab_size, weight_init=TruncatedNormal( config.initializer_range)).to_float( mstype.float16) self.dropout = nn.Dropout(1 - config.hidden_dropout)
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(GPT2LanguageModel, self).__init__() if not is_training: config.hidden_dropout = 0.0 self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings) self.vocab_size = config.vocab_size self.cast = P.Cast() self.shape = P.Shape() self.log_softmax = P.LogSoftmax(axis=-1) self.dtype = config.dtype # self.dense1 = nn.Dense(config.d_model, # config.vocab_size, # weight_init=weight_variable([config.d_model, config.vocab_size]), # has_bias=True).to_float(config.compute_type) self.dense1 = nn.Dense(config.d_model, config.vocab_size, weight_init=TruncatedNormal( config.initializer_range), has_bias=True).to_float(config.compute_type) self.dropout = nn.Dropout(1 - config.hidden_dropout)
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.matmul1 = P.MatMul().shard(strategy1) self.matmul2 = P.MatMul().shard(strategy2) self.logsoftmax = P.LogSoftmax().shard(strategy3)
def __init__(self, axis=-1): super(LogSoftmax, self).__init__() self.log_softmax = P.LogSoftmax(axis)
'skip': ['backward']}), ('MaxPoolWithArgmax', { 'block': P.MaxPoolWithArgmax(ksize=2, strides=2), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128, 32, 8, 16], [128, 32, 8, 16]]}), ('SoftmaxCrossEntropyWithLogits', { 'block': P.SoftmaxCrossEntropyWithLogits(), 'desc_inputs': [[1, 10], [1, 10]], 'desc_bprop': [[1], [1, 10]], 'skip': ['backward_exec']}), ('Flatten', { 'block': P.Flatten(), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128 * 32 * 8 * 16]]}), ('LogSoftmax', { 'block': P.LogSoftmax(), 'desc_inputs': [[64, 2]], 'desc_bprop': [[160, 30522]]}), ('LogSoftmaxGrad', { 'block': G.LogSoftmaxGrad(), 'desc_inputs': [[16, 1234], [16, 1234]], 'desc_bprop': [[64, 2]], 'skip': ['backward']}), ('LayerNorm', { 'block': P.LayerNorm(), 'desc_inputs': [[2, 16], [16], [16]], 'desc_bprop': [[2, 16], [2, 16], [2, 16]]}), ('LayerNormGrad', { 'block': G.LayerNormGrad(), 'desc_inputs': [[2, 16], [2, 16], [2, 16], [2, 16], [16]], 'desc_bprop': [[2, 16], [16], [16]],