Esempio n. 1
0
 def __init__(self, features, class_num=1000):
     super(VGG, self).__init__()
     self.features = features
     self.flatten = layers.Flatten()
     self.classifier = layers.SequentialLayer([
         layers.Dense(512 * 7 * 7, 4096),
         layers.ReLU(),
         layers.Dropout(),
         layers.Dense(4096, 4096),
         layers.ReLU(),
         layers.Dropout(),
         layers.Dense(4096, class_num),
     ])
Esempio n. 2
0
 def __init__(self,
              hidden_size=768,
              seq_length=512,
              num_attention_heads=12,
              intermediate_size=3072,
              attention_probs_dropout_prob=0.02,
              use_one_hot_embeddings=False,
              initializer_range=0.02,
              hidden_dropout_prob=0.1,
              use_relative_positions=False,
              hidden_act="gelu",
              compute_type=ts.float32):
     super(BertEncoderLayer, self).__init__()
     self.attention = BertSelfAttention(
         hidden_size=hidden_size,
         seq_length=seq_length,
         num_attention_heads=num_attention_heads,
         attention_probs_dropout_prob=attention_probs_dropout_prob,
         use_one_hot_embeddings=use_one_hot_embeddings,
         initializer_range=initializer_range,
         hidden_dropout_prob=hidden_dropout_prob,
         use_relative_positions=use_relative_positions,
         compute_type=compute_type)
     self.intermediate = layers.Dense(in_channels=hidden_size,
                                  out_channels=intermediate_size,
                                  activation=hidden_act,
                                  weight_init=TruncatedNormal(initializer_range)).to_float(compute_type)
     self.output = BertOutput(in_channels=intermediate_size,
                              out_channels=hidden_size,
                              initializer_range=initializer_range,
                              dropout_prob=hidden_dropout_prob,
                              compute_type=compute_type)
Esempio n. 3
0
def _fc(in_channel, out_channel):
    weight_shape = (out_channel, in_channel)
    weight = _weight_variable(weight_shape)
    return layers.Dense(in_channel,
                        out_channel,
                        has_bias=True,
                        weight_init=weight,
                        bias_init=0)
Esempio n. 4
0
 def __init__(self,
              input_channel=1280,
              class_num=1000,
              use_activation=False):
     super(MobileNetV2Head, self).__init__()
     # mobilenet head
     self.head = layers.SequentialLayer(
         ([GlobalAvgPooling(),
           layers.Dense(input_channel, class_num)]))
     self.use_activation = use_activation
     self.activation = Softmax()
     self._initialize_weights()
Esempio n. 5
0
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
     super(BertSquadModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = Bert(config, is_training, use_one_hot_embeddings)
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.dense1 = layers.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
                            has_bias=True).to_float(config.compute_type)
     self.num_labels = num_labels
     self.dtype = config.dtype
     self.log_softmax = P.LogSoftmax(axis=1)
     self.is_training = is_training
Esempio n. 6
0
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 bidirectional, num_classes, weight, batch_size):
        super(SentimentNet, self).__init__()
        # Map words to vectors
        self.embedding = layers.Embedding(vocab_size,
                                          embed_size,
                                          embedding_table=weight)
        self.embedding.embedding_table.requires_grad = False
        self.trans = P.Transpose()
        self.perm = (1, 0, 2)

        if context.get_context("device_target") in STACK_LSTM_DEVICE:
            # stack lstm by user
            self.encoder = StackLSTM(input_size=embed_size,
                                     hidden_size=num_hiddens,
                                     num_layers=num_layers,
                                     has_bias=True,
                                     bidirectional=bidirectional,
                                     dropout=0.0)
            self.h, self.c = stack_lstm_default_state(batch_size, num_hiddens,
                                                      num_layers,
                                                      bidirectional)
        else:
            # standard lstm
            self.encoder = layers.LSTM(input_size=embed_size,
                                       hidden_size=num_hiddens,
                                       num_layers=num_layers,
                                       has_bias=True,
                                       bidirectional=bidirectional,
                                       dropout=0.0)
            self.h, self.c = lstm_default_state(batch_size, num_hiddens,
                                                num_layers, bidirectional)

        self.concat = P.Concat(1)
        if bidirectional:
            self.decoder = layers.Dense(num_hiddens * 4, num_classes)
        else:
            self.decoder = layers.Dense(num_hiddens * 2, num_classes)
Esempio n. 7
0
 def __init__(self,
              in_channels,
              out_channels,
              initializer_range=0.02,
              dropout_prob=0.1,
              compute_type=ts.float32):
     super(BertOutput, self).__init__()
     self.dense = layers.Dense(in_channels, out_channels,
                           weight_init=TruncatedNormal(initializer_range)).to_float(compute_type)
     self.dropout = layers.Dropout(1 - dropout_prob)
     self.dropout_prob = dropout_prob
     self.add = P.Add()
     self.layernorm = layers.LayerNorm((out_channels,)).to_float(compute_type)
     self.cast = P.Cast()
Esempio n. 8
0
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
              assessment_method=""):
     super(BertCLSModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = Bert(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dense_1 = layers.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
                             has_bias=True).to_float(config.compute_type)
     self.dropout = layers.Dropout(1 - dropout_prob)
     self.assessment_method = assessment_method
Esempio n. 9
0
 def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0,
              use_one_hot_embeddings=False):
     super(BertNERModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = Bert(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dense_1 = layers.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
                             has_bias=True).to_float(config.compute_type)
     self.dropout = layers.Dropout(1 - dropout_prob)
     self.reshape = P.Reshape()
     self.shape = (-1, config.hidden_size)
     self.use_crf = use_crf
     self.origin_shape = (-1, config.seq_length, self.num_labels)
Esempio n. 10
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=False):
        super(Bert, self).__init__()
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.token_type_ids = None

        self.last_idx = self.num_hidden_layers - 1
        output_embedding_shape = [-1, self.seq_length, self.embedding_size]

        self.bert_embedding_lookup = layers.Embedding(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
            use_one_hot=use_one_hot_embeddings,
            embedding_table=TruncatedNormal(config.initializer_range))

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_relative_positions=config.use_relative_positions,
            use_token_type=True,
            token_type_vocab_size=config.type_vocab_size,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=0.02,
            max_position_embeddings=config.max_position_embeddings,
            dropout_prob=config.hidden_dropout_prob)

        self.bert_encoder = BertTransformer(
            hidden_size=self.hidden_size,
            seq_length=self.seq_length,
            num_attention_heads=config.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            intermediate_size=config.intermediate_size,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            use_relative_positions=config.use_relative_positions,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type,
            return_all_encoders=True)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = P.StridedSlice()

        self.squeeze_1 = P.Squeeze(axis=1)
        self.dense = layers.Dense(self.hidden_size, self.hidden_size,
                              activation="tanh",
                              weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type)
        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
Esempio n. 11
0
    def __init__(self,
                 from_tensor_width,
                 to_tensor_width,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 has_attention_mask=False,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 compute_type=ts.float32):

        super(BertAttention, self).__init__()
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.num_attention_heads = num_attention_heads
        self.size_per_head = size_per_head
        self.has_attention_mask = has_attention_mask
        self.use_relative_positions = use_relative_positions

        self.scores_mul = 1.0 / math.sqrt(float(self.size_per_head))
        self.reshape = P.Reshape()
        self.shape_from_2d = (-1, from_tensor_width)
        self.shape_to_2d = (-1, to_tensor_width)
        weight = TruncatedNormal(initializer_range)
        units = num_attention_heads * size_per_head
        self.query_layer = layers.Dense(from_tensor_width,
                                    units,
                                    activation=query_act,
                                    weight_init=weight).to_float(compute_type)
        self.key_layer = layers.Dense(to_tensor_width,
                                  units,
                                  activation=key_act,
                                  weight_init=weight).to_float(compute_type)
        self.value_layer = layers.Dense(to_tensor_width,
                                    units,
                                    activation=value_act,
                                    weight_init=weight).to_float(compute_type)

        self.shape_from = (-1, from_seq_length, num_attention_heads, size_per_head)
        self.shape_to = (-1, to_seq_length, num_attention_heads, size_per_head)

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        self.multiply_data = -10000.0
        self.matmul = P.BatchMatMul()

        self.softmax = layers.Softmax()
        self.dropout = layers.Dropout(1 - attention_probs_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.Add()
            self.cast = P.Cast()
            self.get_dtype = P.DType()
        if do_return_2d_tensor:
            self.shape_return = (-1, num_attention_heads * size_per_head)
        else:
            self.shape_return = (-1, from_seq_length, num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        if self.use_relative_positions:
            self._generate_relative_positions_embeddings = \
                RelaPosEmbeddingsGenerator(length=to_seq_length,
                                           depth=size_per_head,
                                           max_relative_position=16,
                                           initializer_range=initializer_range,
                                           use_one_hot_embeddings=use_one_hot_embeddings)