def __init__(self, params, input_ids, target_ids=None, training=None): """Constructor for TransformerModel. Args: params: `BigBirdConfig` dictionary. # Run the inputs through the encoder layer to map the symbol # representations to continuous representations. """ self.params = copy.deepcopy(params) self.scope = params["scope"] with tf.compat.v1.variable_scope( self.scope, reuse=tf.compat.v1.AUTO_REUSE) as vs: self.embeder = utils.EmbeddingLayer( vocab_size=self.params["vocab_size"], emb_dim=self.params["hidden_size"], initializer=utils.create_initializer( self.params["initializer_range"]), scale_emb=self.params["rescale_embedding"], use_token_type=False, num_token_types=None, use_position_embeddings=True, max_position_embeddings=self.params["max_position_embeddings"], dropout_prob=self.params["hidden_dropout_prob"]) # encoder self.encoder = encoder.EncoderStack(self.params) self.encoder_output, encoder_mask = self._encode(input_ids, training) # decoder self.decoder = decoder.DecoderStack(self.params) self.predictions = self._decode_and_predict(target_ids, self.encoder_output, encoder_mask, training) super(TransformerModel, self).__init__(name=self.scope, _scope=vs)
def __init__(self, params): """Constructor for BertModel. Args: params: `BigBirdConfig` dictionary. """ self.params = copy.deepcopy(params) self.scope = params["scope"] with tf.compat.v1.variable_scope(self.scope, reuse=tf.compat.v1.AUTO_REUSE) as vs: self.embeder = utils.EmbeddingLayer( vocab_size=self.params["vocab_size"], emb_dim=self.params["hidden_size"], initializer=utils.create_initializer( self.params["initializer_range"]), scale_emb=self.params["rescale_embedding"], use_token_type=True, num_token_types=self.params["type_vocab_size"], use_position_embeddings=True, max_position_embeddings=self.params["max_position_embeddings"], dropout_prob=self.params["hidden_dropout_prob"]) self.encoder = encoder.EncoderStack(self.params) self.pooler = tf.compat.v1.layers.Dense( units=self.params["hidden_size"], activation=tf.tanh, kernel_initializer=utils.create_initializer( self.params["initializer_range"]), name="pooler/dense") super(BertModel, self).__init__(name=self.scope, _scope=vs)
def __init__(self, params): """Constructor for TransformerModel. Args: params: `BigBirdConfig` dictionary. """ self.params = copy.deepcopy(params) self.scope = params["scope"] with tf.compat.v1.variable_scope(self.scope, reuse=tf.compat.v1.AUTO_REUSE) as vs: self.embeder = utils.EmbeddingLayer( vocab_size=self.params["vocab_size"], emb_dim=self.params["hidden_size"], initializer=utils.create_initializer( self.params["initializer_range"]), scale_emb=self.params["rescale_embedding"], use_token_type=False, num_token_types=None, use_position_embeddings=True, max_position_embeddings=self.params["max_position_embeddings"], dropout_prob=self.params["hidden_dropout_prob"]) self.encoder = encoder.EncoderStack(self.params) self.decoder = decoder.DecoderStack(self.params) super(TransformerModel, self).__init__(name=self.scope, _scope=vs)
def __init__(self, params): """Constructor for BertModel. Args: params: `BigBirdConfig` dictionary. """ self.params = copy.deepcopy(params) self.scope = params["scope"] super(BertModel, self).__init__(name=self.scope) # validate params self.pad = lambda x: x if params["max_encoder_length"] <= 512: logging.info("Switching to full attention for short sequences") self.params["attention_type"] = "original_full" if self.params["attention_type"] == "simulated_sparse" or self.params[ "attention_type"] == "block_sparse": if params["max_encoder_length"] % params["block_size"]: logging.info( "Expand max_encoder_length to next multiple of block_size") self.params["max_encoder_length"] = ( params["max_encoder_length"] // params["block_size"] + 1) * params["block_size"] pad_size = self.params["max_encoder_length"] - params[ "max_encoder_length"] paddings = [[0, 0], [0, pad_size]] self.pad = lambda x: tf.pad(x, paddings) with tf.compat.v1.variable_scope(self.scope, reuse=tf.compat.v1.AUTO_REUSE): self.embeder = utils.EmbeddingLayer( vocab_size=self.params["vocab_size"], emb_dim=self.params["hidden_size"], initializer=utils.create_initializer( self.params["initializer_range"]), scale_emb=self.params["rescale_embedding"], use_token_type=True, num_token_types=self.params["type_vocab_size"], use_position_embeddings=True, max_position_embeddings=self.params["max_position_embeddings"], dropout_prob=self.params["hidden_dropout_prob"]) self.encoder = encoder.EncoderStack(self.params) self.pooler = utils.SimpleDenseLayer( input_size=self.params["hidden_size"], output_size=self.params["hidden_size"], initializer=utils.create_initializer( self.params["initializer_range"]), activation=tf.tanh, name="pooler/dense")
def __init__(self, params, input_ids, token_type_ids=None, training=None): """Constructor for BertModel. Args: params: `BigBirdConfig` dictionary. input_ids: int32 Tensor of shape [batch_size, seq_length]. token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. training: Boolean indicating whether the call is training or inference. """ self.params = copy.deepcopy(params) self.scope = params["scope"] with tf.compat.v1.variable_scope( self.scope, reuse=tf.compat.v1.AUTO_REUSE) as vs: #token type 의 embedding을 위해서 따로 token type을 만들지 않았다면 모두 0번 token([CLS])의 embedding을 만들어줌 if token_type_ids is None: token_type_ids = tf.zeros_like(input_ids, dtype=tf.int32) # input_ids의 input_mask를 생성하는 부분 input_mask = tf.where(input_ids > 0, tf.ones_like(input_ids), tf.zeros_like(input_ids)) # 1) embedding process # 1-1 embedding layer정의 self.embeder = utils.EmbeddingLayer( vocab_size=self.params["vocab_size"], # 50358 emb_dim=self.params["hidden_size"], # 768 initializer=utils.create_initializer( self.params["initializer_range"]), #초기화 0.02 truncated_normal_initializer 사 scale_emb=self.params["rescale_embedding"], # false use_token_type=True, num_token_types=self.params["type_vocab_size"], # 2 use_position_embeddings=True, # position embedding 실행 max_position_embeddings=self.params["max_position_embeddings"],# 4096 dropout_prob=self.params["hidden_dropout_prob"]) # drop out 10% # 1-2 embedding layer 사용 token + token_type + position embedding_output = self.embeder.operation(input_ids, self.params["max_encoder_length"], token_type_ids=token_type_ids, training=training) # 2) encoder 레이어 정의 # 2-1 encoder 레이어 정의 self.encoder = encoder.EncoderStack(self.params) # 2-2 encoder 계산 Sparse Attention self.sequence_output = self.encoder.operation(embedding_output, input_mask, training) # 3) Pooling 레이어 정의 # The "pooler" converts the encoded sequence tensor of shape # [batch_size, seq_length, hidden_size] to a tensor of shape # [batch_size, hidden_size]. This is necessary for segment-level # (or segment-pair-level) classification tasks where we need a fixed # dimensional representation of the segment. first_token_tensor = self.sequence_output[:, 0, :] # [CLS] token에 대한 attetion 값을 가져(4, 768) # We "pool" the model by simply taking the hidden state corresponding # to the first token. We assume that this has been pre-trained # 마지막 768 만큼 dense 계산 self.pooler = tf.compat.v1.layers.Dense( units=self.params["hidden_size"], activation=tf.tanh, kernel_initializer=utils.create_initializer( self.params["initializer_range"]), name="pooler/dense") # 결과 -> (4, 786) self.pooled_output = self.pooler(first_token_tensor) super(BertModel, self).__init__(name=self.scope, _scope=vs)