コード例 #1
0
 def __init__(self,
              hidden_size=768,
              seq_length=512,
              num_attention_heads=12,
              intermediate_size=3072,
              attention_probs_dropout_prob=0.02,
              use_one_hot_embeddings=False,
              initializer_range=0.02,
              hidden_dropout_prob=0.1,
              use_relative_positions=False,
              hidden_act="gelu",
              compute_type=ts.float32):
     super(BertEncoderLayer, self).__init__()
     self.attention = BertSelfAttention(
         hidden_size=hidden_size,
         seq_length=seq_length,
         num_attention_heads=num_attention_heads,
         attention_probs_dropout_prob=attention_probs_dropout_prob,
         use_one_hot_embeddings=use_one_hot_embeddings,
         initializer_range=initializer_range,
         hidden_dropout_prob=hidden_dropout_prob,
         use_relative_positions=use_relative_positions,
         compute_type=compute_type)
     self.intermediate = layers.Dense(in_channels=hidden_size,
                                  out_channels=intermediate_size,
                                  activation=hidden_act,
                                  weight_init=TruncatedNormal(initializer_range)).to_float(compute_type)
     self.output = BertOutput(in_channels=intermediate_size,
                              out_channels=hidden_size,
                              initializer_range=initializer_range,
                              dropout_prob=hidden_dropout_prob,
                              compute_type=compute_type)
コード例 #2
0
ファイル: ssd300.py プロジェクト: huxiaoman7/tinyms
def init_net_param(network, initialize_mode='TruncatedNormal'):
    """Init the parameters in net."""
    params = network.trainable_params()
    for p in params:
        if 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name:
            if initialize_mode == 'TruncatedNormal':
                p.set_data(
                    initializer(TruncatedNormal(0.02), p.data.shape,
                                p.data.dtype))
            else:
                p.set_data(initialize_mode, p.data.shape, p.data.dtype)
コード例 #3
0
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False):
     super(BertSquadModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = Bert(config, is_training, use_one_hot_embeddings)
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.dense1 = layers.Dense(config.hidden_size, num_labels, weight_init=self.weight_init,
                            has_bias=True).to_float(config.compute_type)
     self.num_labels = num_labels
     self.dtype = config.dtype
     self.log_softmax = P.LogSoftmax(axis=1)
     self.is_training = is_training
コード例 #4
0
    def __init__(self,
                 embedding_size,
                 embedding_shape,
                 use_relative_positions=False,
                 use_token_type=False,
                 token_type_vocab_size=16,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 max_position_embeddings=512,
                 dropout_prob=0.1):
        super(EmbeddingPostprocessor, self).__init__()
        self.use_token_type = use_token_type
        self.token_type_vocab_size = token_type_vocab_size
        self.use_one_hot_embeddings = use_one_hot_embeddings
        self.max_position_embeddings = max_position_embeddings
        self.embedding_table = Parameter(initializer
                                         (TruncatedNormal(initializer_range),
                                          [token_type_vocab_size,
                                           embedding_size]),
                                         name='embedding_table')

        self.shape_flat = (-1,)
        self.one_hot = layers.OneHot()
        self.on_value = Tensor(1.0, ts.float32)
        self.off_value = Tensor(0.1, ts.float32)
        self.array_mul = P.MatMul()
        self.reshape = P.Reshape()
        self.shape = tuple(embedding_shape)
        self.layernorm = layers.LayerNorm((embedding_size,))
        self.dropout = layers.Dropout(1 - dropout_prob)
        self.gather = P.Gather()
        self.use_relative_positions = use_relative_positions
        self.slice = P.StridedSlice()
        self.full_position_embeddings = Parameter(initializer
                                                  (TruncatedNormal(initializer_range),
                                                   [max_position_embeddings,
                                                    embedding_size]),
                                                  name='full_position_embeddings')
コード例 #5
0
 def __init__(self,
              in_channels,
              out_channels,
              initializer_range=0.02,
              dropout_prob=0.1,
              compute_type=ts.float32):
     super(BertOutput, self).__init__()
     self.dense = layers.Dense(in_channels, out_channels,
                           weight_init=TruncatedNormal(initializer_range)).to_float(compute_type)
     self.dropout = layers.Dropout(1 - dropout_prob)
     self.dropout_prob = dropout_prob
     self.add = P.Add()
     self.layernorm = layers.LayerNorm((out_channels,)).to_float(compute_type)
     self.cast = P.Cast()
コード例 #6
0
 def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False,
              assessment_method=""):
     super(BertCLSModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = Bert(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dense_1 = layers.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
                             has_bias=True).to_float(config.compute_type)
     self.dropout = layers.Dropout(1 - dropout_prob)
     self.assessment_method = assessment_method
コード例 #7
0
 def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0,
              use_one_hot_embeddings=False):
     super(BertNERModel, self).__init__()
     if not is_training:
         config.hidden_dropout_prob = 0.0
         config.hidden_probs_dropout_prob = 0.0
     self.bert = Bert(config, is_training, use_one_hot_embeddings)
     self.cast = P.Cast()
     self.weight_init = TruncatedNormal(config.initializer_range)
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.dtype = config.dtype
     self.num_labels = num_labels
     self.dense_1 = layers.Dense(config.hidden_size, self.num_labels, weight_init=self.weight_init,
                             has_bias=True).to_float(config.compute_type)
     self.dropout = layers.Dropout(1 - dropout_prob)
     self.reshape = P.Reshape()
     self.shape = (-1, config.hidden_size)
     self.use_crf = use_crf
     self.origin_shape = (-1, config.seq_length, self.num_labels)
コード例 #8
0
 def __init__(self,
              vocab_size,
              embedding_size,
              embedding_shape,
              use_one_hot_embeddings=False,
              initializer_range=0.02):
     super(EmbeddingLookup, self).__init__()
     self.vocab_size = vocab_size
     self.use_one_hot_embeddings = use_one_hot_embeddings
     self.embedding_table = Parameter(initializer
                                      (TruncatedNormal(initializer_range),
                                       [vocab_size, embedding_size]))
     self.expand = P.ExpandDims()
     self.shape_flat = (-1,)
     self.gather = P.Gather()
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, ts.float32)
     self.off_value = Tensor(0.0, ts.float32)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.shape = tuple(embedding_shape)
コード例 #9
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]))

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = P.Reshape()
        self.one_hot = layers.OneHot(depth=self.vocab_size)
        self.shape = P.Shape()
        self.gather = P.Gather()  # index_select
        self.matmul = P.BatchMatMul()
コード例 #10
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=False):
        super(Bert, self).__init__()
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.token_type_ids = None

        self.last_idx = self.num_hidden_layers - 1
        output_embedding_shape = [-1, self.seq_length, self.embedding_size]

        self.bert_embedding_lookup = layers.Embedding(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
            use_one_hot=use_one_hot_embeddings,
            embedding_table=TruncatedNormal(config.initializer_range))

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_relative_positions=config.use_relative_positions,
            use_token_type=True,
            token_type_vocab_size=config.type_vocab_size,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=0.02,
            max_position_embeddings=config.max_position_embeddings,
            dropout_prob=config.hidden_dropout_prob)

        self.bert_encoder = BertTransformer(
            hidden_size=self.hidden_size,
            seq_length=self.seq_length,
            num_attention_heads=config.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            intermediate_size=config.intermediate_size,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            use_relative_positions=config.use_relative_positions,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type,
            return_all_encoders=True)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = P.StridedSlice()

        self.squeeze_1 = P.Squeeze(axis=1)
        self.dense = layers.Dense(self.hidden_size, self.hidden_size,
                              activation="tanh",
                              weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type)
        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
コード例 #11
0
    def __init__(self,
                 from_tensor_width,
                 to_tensor_width,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 has_attention_mask=False,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 compute_type=ts.float32):

        super(BertAttention, self).__init__()
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.num_attention_heads = num_attention_heads
        self.size_per_head = size_per_head
        self.has_attention_mask = has_attention_mask
        self.use_relative_positions = use_relative_positions

        self.scores_mul = 1.0 / math.sqrt(float(self.size_per_head))
        self.reshape = P.Reshape()
        self.shape_from_2d = (-1, from_tensor_width)
        self.shape_to_2d = (-1, to_tensor_width)
        weight = TruncatedNormal(initializer_range)
        units = num_attention_heads * size_per_head
        self.query_layer = layers.Dense(from_tensor_width,
                                    units,
                                    activation=query_act,
                                    weight_init=weight).to_float(compute_type)
        self.key_layer = layers.Dense(to_tensor_width,
                                  units,
                                  activation=key_act,
                                  weight_init=weight).to_float(compute_type)
        self.value_layer = layers.Dense(to_tensor_width,
                                    units,
                                    activation=value_act,
                                    weight_init=weight).to_float(compute_type)

        self.shape_from = (-1, from_seq_length, num_attention_heads, size_per_head)
        self.shape_to = (-1, to_seq_length, num_attention_heads, size_per_head)

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        self.multiply_data = -10000.0
        self.matmul = P.BatchMatMul()

        self.softmax = layers.Softmax()
        self.dropout = layers.Dropout(1 - attention_probs_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.Add()
            self.cast = P.Cast()
            self.get_dtype = P.DType()
        if do_return_2d_tensor:
            self.shape_return = (-1, num_attention_heads * size_per_head)
        else:
            self.shape_return = (-1, from_seq_length, num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        if self.use_relative_positions:
            self._generate_relative_positions_embeddings = \
                RelaPosEmbeddingsGenerator(length=to_seq_length,
                                           depth=size_per_head,
                                           max_relative_position=16,
                                           initializer_range=initializer_range,
                                           use_one_hot_embeddings=use_one_hot_embeddings)