def __init__(self, config, is_training=True, use_one_hot_embeddings=False):
     super(GPT2SummarizationModel, self).__init__()
     self.gpt2 = GPT2Model(
         config, is_training, use_one_hot_embeddings)
     self.lm_head = nn.Dense(config.d_model, config.vocab_size, has_bias=False,
                             weight_init=TruncatedNormal(sigma=config.initializer_range))
     self.reshape = P.Reshape()
     self.softmax = P.LogSoftmax(axis=-1)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.batch_size = config.batch_size
     self.vocab_size = config.vocab_size
     self.seq_length = config.seq_length
     self.onehot = P.OneHot()
     self.print= P.Print()
Exemple #2
0
 def __init__(self, config, is_training=True):
     super(Decoder, self).__init__()
     self.hidden_size = config.hidden_size
     self.vocab_size = config.trg_vocab_size
     self.embedding_size = config.decoder_embedding_size
     self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
     self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \
         hidden_size=self.hidden_size).to_float(config.compute_type)
     self.text_len = config.max_length
     self.shape = P.Shape()
     self.transpose = P.Transpose()
     self.p = P.Print()
     self.cast = P.Cast()
     self.concat = P.Concat(axis=2)
     self.squeeze = P.Squeeze(axis=0)
     self.expandims = P.ExpandDims()
     self.log_softmax = P.LogSoftmax(axis=1)
     weight, bias = dense_default_state(
         self.embedding_size + self.hidden_size * 3, self.vocab_size)
     self.fc = nn.Dense(self.embedding_size + self.hidden_size * 3,
                        self.vocab_size,
                        weight_init=weight,
                        bias_init=bias).to_float(config.compute_type)
     self.attention = Attention(config)
     self.bmm = P.BatchMatMul()
     self.dropout = nn.Dropout(0.7)
     self.expandims = P.ExpandDims()
     self.dtype = config.dtype
Exemple #3
0
 def __init__(self, config):
     super(GPT_Head, self).__init__()
     self.matmul = P.MatMul(transpose_b=True)
     self.embedding_size = config.embedding_size
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.compute_dtype
     self.cast = P.Cast()
Exemple #4
0
 def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1):
     super(AttnDecoderRNN, self).__init__()
     self.hidden_size = hidden_size
     self.output_size = output_size
     self.dropout_p = dropout_p
     self.max_length = max_length
     self.embedding = nn.Embedding(self.output_size, self.hidden_size)
     self.attn = nn.Dense(in_channels=self.hidden_size * 2,
                          out_channels=self.max_length).to_float(
                              mstype.float16)
     self.attn_combine = nn.Dense(in_channels=self.hidden_size * 2,
                                  out_channels=self.hidden_size).to_float(
                                      mstype.float16)
     self.dropout = nn.Dropout(keep_prob=1.0 - self.dropout_p)
     self.gru = GRU(hidden_size, hidden_size).to_float(mstype.float16)
     self.out = nn.Dense(in_channels=self.hidden_size,
                         out_channels=self.output_size).to_float(
                             mstype.float16)
     self.transpose = P.Transpose()
     self.concat = P.Concat(axis=2)
     self.concat1 = P.Concat(axis=1)
     self.softmax = P.Softmax(axis=1)
     self.relu = P.ReLU()
     self.log_softmax = P.LogSoftmax(axis=1)
     self.bmm = P.BatchMatMul()
     self.unsqueeze = P.ExpandDims()
     self.squeeze = P.Squeeze(1)
     self.squeeze1 = P.Squeeze(0)
     self.cast = P.Cast()
Exemple #5
0
 def __init__(self,
              config,
              is_training,
              num_labels=2,
              dropout_prob=0.0,
              use_one_hot_embeddings=False,
              phase_type="student"):
     super(BertModelCLS, self).__init__()
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.phase_type = phase_type
     if self.phase_type == "teacher":
         self.dense = nn.Dense(config.hidden_size,
                               self.num_labels,
                               weight_init=self.weight_init,
                               has_bias=True).to_float(config.compute_type)
     else:
         self.dense_1 = nn.Dense(config.hidden_size,
                                 self.num_labels,
                                 weight_init=self.weight_init,
                                 has_bias=True).to_float(
                                     config.compute_type)
     self.dropout = nn.ReLU()
Exemple #6
0
 def __init__(self,
              config,
              is_training,
              num_labels=11,
              dropout_prob=0.0,
              use_one_hot_embeddings=False,
              phase_type="student"):
     super(BertModelNER, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dense_1 = nn.Dense(config.hidden_size,
                             self.num_labels,
                             weight_init=self.weight_init,
                             has_bias=True).to_float(config.compute_type)
     self.dropout = nn.ReLU()
     self.reshape = P.Reshape()
     self.shape = (-1, config.hidden_size)
     self.origin_shape = (-1, config.seq_length, self.num_labels)
Exemple #7
0
    def __init__(self, config, is_training, use_one_hot_embeddings=False):
        """
        Args:
            config: the configuration of GPT-2 model
            is_training (bool): `True` for train (finetune), `False` for evaluation.
            use_one_hot_embeddings (bool): default False.
        """
        super(GPT2CBTModel, self).__init__()
        if not is_training:
            config.summary_first_dropout = 0.0

        self.is_training = is_training
        self.d_model = config.d_model
        self.batch_size = config.batch_size
        self.seq_length = config.seq_length
        self.vocab_size = config.vocab_size
        self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings)
        self.cast = P.Cast()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.log_softmax = P.LogSoftmax(axis=-1)

        self.dtype = config.dtype
        self.lm_head = nn.Dense(config.d_model,
                                config.vocab_size,
                                weight_init=TruncatedNormal(
                                    config.initializer_range),
                                has_bias=False).to_float(config.compute_type)

        self.first_dropout = nn.Dropout(1 - config.summary_first_dropout)
Exemple #8
0
 def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
     super().__init__()
     self.matmul1 = P.MatMul().set_strategy(strategy1)
     self.matmul2 = P.MatMul().set_strategy(strategy2)
     self.gelu = P.Gelu().set_strategy(strategy3)
     self.tanh = P.Tanh().set_strategy(strategy4)
     self.softmax = P.Softmax(axis=(0, 1)).set_strategy(strategy5)
     self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
 def __init__(self, strategy1, strategy2, strategy3):
     super().__init__()
     self.matmul1 = P.MatMul().shard(strategy1)
     self.matmul2 = P.MatMul().shard(strategy2)
     self.gelu = P.Gelu().shard(strategy3)
     self.tanh = P.Tanh().shard(strategy3)
     self.softmax = P.Softmax().shard(strategy3)
     self.logsoftmax = P.LogSoftmax().shard(strategy3)
Exemple #10
0
 def __init__(self):
     super(SoftmaxCrossEntropyWithLogits, self).__init__()
     self.log_softmax = P.LogSoftmax()
     self.neg = P.Neg()
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, mstype.float32)
     self.off_value = Tensor(0.0, mstype.float32)
     self.reduce_sum = P.ReduceSum()
 def __init__(self, config):
     super(GetNextSentenceOutput, self).__init__()
     self.log_softmax = P.LogSoftmax()
     weight_init = TruncatedNormal(config.initializer_range)
     self.dense = nn.Dense(config.hidden_size, 2,
                           weight_init=weight_init, has_bias=True).to_float(config.compute_type)
     self.dtype = config.dtype
     self.cast = P.Cast()
Exemple #12
0
    def __init__(self,
                 probs=None,
                 seed=None,
                 dtype=mstype.int32,
                 name="Categorical"):
        param = dict(locals())
        param['param_dict'] = {'probs': probs}
        valid_dtype = mstype.uint_type + mstype.int_type + mstype.float_type
        Validator.check_type_name("dtype", dtype, valid_dtype,
                                  type(self).__name__)
        super(Categorical, self).__init__(seed, dtype, name, param)

        self._probs = self._add_parameter(probs, 'probs')
        if self.probs is not None:
            check_rank(self.probs)
            check_prob(self.probs)
            check_sum_equal_one(probs)

            # update is_scalar_batch and broadcast_shape
            # drop one dimension
            if self.probs.shape[:-1] == ():
                self._is_scalar_batch = True
            self._broadcast_shape = self._broadcast_shape[:-1]

        self.argmax = P.ArgMaxWithValue(axis=-1)
        self.broadcast = broadcast_to
        self.cast = P.Cast()
        self.clip_by_value = C.clip_by_value
        self.concat = P.Concat(-1)
        self.cumsum = P.CumSum()
        self.dtypeop = P.DType()
        self.exp = exp_generic
        self.expand_dim = P.ExpandDims()
        self.fill = P.Fill()
        self.gather = P.GatherNd()
        self.greater = P.Greater()
        self.issubclass = P.IsSubClass()
        self.less = P.Less()
        self.log = log_generic
        self.log_softmax = P.LogSoftmax()
        self.logicor = P.LogicalOr()
        self.logicand = P.LogicalAnd()
        self.multinomial = P.Multinomial(seed=self.seed)
        self.reshape = P.Reshape()
        self.reduce_sum = P.ReduceSum(keep_dims=True)
        self.select = P.Select()
        self.shape = P.Shape()
        self.softmax = P.Softmax()
        self.squeeze = P.Squeeze()
        self.squeeze_first_axis = P.Squeeze(0)
        self.squeeze_last_axis = P.Squeeze(-1)
        self.square = P.Square()
        self.transpose = P.Transpose()
        self.is_nan = P.IsNan()

        self.index_type = mstype.int32
        self.nan = np.nan
Exemple #13
0
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     super(GPT2CoQA, self).__init__()
     self.gpt2 = GPT2CoQAModel(config, is_training, use_one_hot_embeddings)
     self.loss = CrossEntropyCalculation(is_training=is_training)
     self.is_training = is_training
     self.num_labels = config.vocab_size
     self.loss = CrossEntropyCalculation(is_training=is_training)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.label_indices = Tensor(np.array([x for x in range(1, config.seq_length)]), mindspore.int32)
 def __init__(self, config):
     super(PANGUALPHA_Head, self).__init__()
     if config.word_emb_dp:
         self.matmul = P.MatMul(transpose_b=True).shard(((config.dp, 1), (1, 1)))
     else:
         self.matmul = P.MatMul(transpose_b=True).shard(((config.dp, 1), (config.mp, 1)))
     self.embedding_size = config.embedding_size
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.compute_dtype
     self.cast = P.Cast()
Exemple #15
0
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     super(GPT2Lambada, self).__init__()
     self.gpt2 = GPT2LambadaModel(config, is_training, use_one_hot_embeddings)
     self.num_labels = config.vocab_size
     self.loss = CrossEntropyCalculationWithMask(is_training=is_training, num_labels=self.num_labels, config=config)
     self.is_training = is_training
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.reshape = P.Reshape()
     self.shape = P.Shape()
     self.cast = P.Cast()
Exemple #16
0
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
     super(BertSquadModel, self).__init__()
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.dense1 = nn.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
                            has_bias=True).to_float(config.compute_type)
     self.num_labels = num_labels
     self.dtype = config.dtype
     self.log_softmax = P.LogSoftmax(axis=1)
     self.is_training = is_training
Exemple #17
0
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
     super(BertRegressionModel, self).__init__()
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dropout = nn.Dropout(1 - dropout_prob)
     self.dense_1 = nn.Dense(config.hidden_size, 1, weight_init=self.weight_init,
                             has_bias=True).to_float(mstype.float16)
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     super(GPT2CoQAModel, self).__init__()
     self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings)
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.dense1 = nn.Dense(config.d_model,
                            config.vocab_size,
                            weight_init=self.weight_init,
                            has_bias=True).to_float(config.compute_type)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.vocab_size = config.vocab_size
     self.dtype = config.dtype
Exemple #19
0
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     super(GPT2Translation, self).__init__()
     self.gpt2 = GPT2TranslationModel(config, is_training, use_one_hot_embeddings)
     self.num_labels = config.vocab_size
     self.loss = CrossEntropyCalculationWithMask(is_training=is_training, num_labels=self.num_labels, config=config)
     self.is_training = is_training
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.reshape = P.Reshape()
     self.shape = P.Shape()
     self.gather = P.GatherV2()
     self.indices1 = Tensor(np.array([x for x in range(config.seq_length - 1)]), mindspore.int32)
     self.indices2 = Tensor(np.array([x for x in range(1, config.seq_length)]), mindspore.int32)
Exemple #20
0
def test_logsoftmax():
    x = np.array([[-0.08082921, -0.13706027, -0.4711177,  -0.05606057],
                  [-0.46082982,  1.1761844,  -1.016654,   -1.743829  ],
                  [-1.5062045,   0.6910976,   0.4839723,   1.1502692 ]]).astype(np.float32)
    expect = np.array([[-1.2939762,  -1.3502073,  -1.6842647,  -1.2692076 ],
                       [-1.9445671,  -0.3075528,  -2.5003912,  -3.2275662 ],
                       [-3.452001,   -1.2546989,  -1.4618242,  -0.79552734]]).astype(np.float32)

    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    LogSoftmax = P.LogSoftmax()
    output = LogSoftmax(Tensor(x))
    assert np.allclose(output.asnumpy(), expect)
Exemple #21
0
 def __init__(self, config, is_training, use_one_hot_embeddings=False, num_labels=10):
     super(GPT2CBT, self).__init__()
     self.gpt2 = GPT2CBTModel(config, is_training, use_one_hot_embeddings, num_labels=num_labels)
     self.loss1 = CrossEntropyCalculation(is_training=is_training)
     self.loss2 = CrossEntropyCalculation(is_training=is_training)
     self.mc_num_labels = num_labels
     self.lm_num_labels = config.vocab_size
     self.is_training = is_training
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.shape = P.Shape()
     self.reshape = P.Reshape()
     self.cast = P.Cast()
def validate_model(net, dataset):
    accs = []
    for _, data in enumerate(dataset.create_dict_iterator()):
        train_x = Tensor(data['image'].astype(np.float32))
        label = Tensor(data['label'].astype(np.int32))
        output = net(train_x)
        log_output = P.LogSoftmax(axis=1)(output)
        acc = np.mean(log_output.asnumpy().argmax(axis=1) == label.asnumpy())
        accs.append(acc)

    acc_mean = np.mean(accs)
    return acc_mean
Exemple #23
0
 def __init__(self, config=None, is_training=None, use_one_hot_embeddings=False):
     super(GPT2Summarization, self).__init__()
     self.gpt2 = GPT2SummarizationModel(config, is_training, use_one_hot_embeddings)
     self.is_training = is_training
     self.last_idx = (-1,)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.reshape = P.Reshape()
     self.shape = P.Shape()
     self.batch_size = config.batch_size
     self.seq_length = config.seq_length
     self.vocab_size = config.vocab_size
     self.cast = P.Cast()
     self.loss_function = CrossEntropyCalculationWithMask(num_labels=self.vocab_size,is_training=self.is_training,config=config)
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
              assessment_method=""):
     super(BertCLSModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = BertModel(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dense_1 = nn.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
                             has_bias=True).to_float(config.compute_type)
     self.dropout = nn.Dropout(1 - dropout_prob)
     self.assessment_method = assessment_method
def train_model(train_net, net, dataset):
    accs = []
    loss_sum = 0
    for _, data in enumerate(dataset.create_dict_iterator()):
        train_x = Tensor(data['image'].astype(np.float32))
        label = Tensor(data['label'].astype(np.int32))
        loss = train_net(train_x, label)
        output = net(train_x)
        log_output = P.LogSoftmax(axis=1)(output)
        acc = np.mean(log_output.asnumpy().argmax(axis=1) == label.asnumpy())
        accs.append(acc)
        loss_sum += loss.asnumpy()

    loss_sum = loss_sum / len(accs)
    acc_mean = np.mean(accs)
    return loss_sum, acc_mean
Exemple #26
0
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     """
     Args:
         config: the configuration of GPT-2 model
         is_training (bool): `True` for train (finetune), `False` for evaluation.
         use_one_hot_embeddings (bool): default False.
     """
     super(GPT2LambadaModel, self).__init__()
     if not is_training:
         config.hidden_dropout = 0.0
     self.vocab_size = config.vocab_size
     self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.shape = P.Shape()
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.dense1 = nn.Dense(config.d_model,
                            config.vocab_size,
                            weight_init=TruncatedNormal(
                                config.initializer_range)).to_float(
                                    mstype.float16)
     self.dropout = nn.Dropout(1 - config.hidden_dropout)
Exemple #27
0
    def __init__(self, config, is_training, use_one_hot_embeddings=False):
        super(GPT2LanguageModel, self).__init__()
        if not is_training:
            config.hidden_dropout = 0.0

        self.gpt2 = GPT2Model(config, is_training, use_one_hot_embeddings)
        self.vocab_size = config.vocab_size
        self.cast = P.Cast()
        self.shape = P.Shape()
        self.log_softmax = P.LogSoftmax(axis=-1)

        self.dtype = config.dtype
        # self.dense1 = nn.Dense(config.d_model,
        #                        config.vocab_size,
        #                        weight_init=weight_variable([config.d_model, config.vocab_size]),
        #                        has_bias=True).to_float(config.compute_type)
        self.dense1 = nn.Dense(config.d_model,
                               config.vocab_size,
                               weight_init=TruncatedNormal(
                                   config.initializer_range),
                               has_bias=True).to_float(config.compute_type)
        self.dropout = nn.Dropout(1 - config.hidden_dropout)
 def __init__(self, strategy1, strategy2, strategy3):
     super().__init__()
     self.matmul1 = P.MatMul().shard(strategy1)
     self.matmul2 = P.MatMul().shard(strategy2)
     self.logsoftmax = P.LogSoftmax().shard(strategy3)
Exemple #29
0
 def __init__(self, axis=-1):
     super(LogSoftmax, self).__init__()
     self.log_softmax = P.LogSoftmax(axis)
Exemple #30
0
     'skip': ['backward']}),
 ('MaxPoolWithArgmax', {
     'block': P.MaxPoolWithArgmax(ksize=2, strides=2),
     'desc_inputs': [[128, 32, 32, 64]],
     'desc_bprop': [[128, 32, 8, 16], [128, 32, 8, 16]]}),
 ('SoftmaxCrossEntropyWithLogits', {
     'block': P.SoftmaxCrossEntropyWithLogits(),
     'desc_inputs': [[1, 10], [1, 10]],
     'desc_bprop': [[1], [1, 10]],
     'skip': ['backward_exec']}),
 ('Flatten', {
     'block': P.Flatten(),
     'desc_inputs': [[128, 32, 32, 64]],
     'desc_bprop': [[128 * 32 * 8 * 16]]}),
 ('LogSoftmax', {
     'block': P.LogSoftmax(),
     'desc_inputs': [[64, 2]],
     'desc_bprop': [[160, 30522]]}),
 ('LogSoftmaxGrad', {
     'block': G.LogSoftmaxGrad(),
     'desc_inputs': [[16, 1234], [16, 1234]],
     'desc_bprop': [[64, 2]],
     'skip': ['backward']}),
 ('LayerNorm', {
     'block': P.LayerNorm(),
     'desc_inputs': [[2, 16], [16], [16]],
     'desc_bprop': [[2, 16], [2, 16], [2, 16]]}),
 ('LayerNormGrad', {
     'block': G.LayerNormGrad(),
     'desc_inputs': [[2, 16], [2, 16], [2, 16], [2, 16], [16]],
     'desc_bprop': [[2, 16], [16], [16]],