def __init__(self, config, use_one_hot_embeddings, **kwargs): kwargs['autocast'] = False super(SharedTransformer, self).__init__(kwargs) self.all_layer_outputs = [] self.last_key_layer = None self.config = config self.initializer = base.create_initializer(config.initializer_range) self.attention_mask = None self.use_one_hot_embeddings = use_one_hot_embeddings with tf.compat.v1.variable_scope("layer"): self.layer = ForwardLayer(self.config, self.initializer)
class SharedTransformer(tf.keras.layers.Layer): def __init__(self, config, use_one_hot_embeddings, **kwargs): kwargs['autocast'] = False super(SharedTransformer, self).__init__(kwargs) self.all_layer_outputs = [] self.last_key_layer = None self.config = config self.initializer = base.create_initializer(config.initializer_range) self.attention_mask = None self.use_one_hot_embeddings = use_one_hot_embeddings with tf.compat.v1.variable_scope("layer"): self.layer = ForwardLayer(self.config, self.initializer) def call(self, input_ids, input_mask, segment_ids): with tf.compat.v1.variable_scope("embeddings"): self.embedding_layer = Embedding2() input_tensor = self.embedding_layer.apply( input_ids, segment_ids, self.config.initializer_range, self.config.vocab_size, self.config.embedding_size, self.config.type_vocab_size, self.config.max_position_embeddings, self.config.hidden_dropout_prob, self.use_one_hot_embeddings) input_tensor = self.embedding_projection(input_tensor) self.embedding_output = input_tensor input_shape = bc.get_shape_list2(input_tensor) batch_size, seq_length, _ = input_shape with tf.compat.v1.variable_scope("encoder"): self.attention_mask = bc.create_attention_mask_from_input_mask2( input_tensor, input_mask) prev_output = bc.reshape_to_matrix(input_tensor) with tf.compat.v1.variable_scope("layer"): intermediate_output, prev_output = self.layer.apply( prev_output, batch_size, seq_length, self.attention_mask) final_output = bc.reshape_from_matrix2(prev_output, input_shape) self.all_layer_outputs.append(final_output) for layer_idx in range(1, self.config.num_hidden_layers): with tf.compat.v1.variable_scope("layer", reuse=True): intermediate_output, prev_output = self.layer.apply( prev_output, batch_size, seq_length, self.attention_mask) final_output = bc.reshape_from_matrix2( prev_output, input_shape) self.all_layer_outputs.append(final_output) return prev_output def embedding_projection(self, input_tensor): with tf.compat.v1.variable_scope("embedding_projection", reuse=True): return bc.dense(self.config.hidden_size, self.initializer)(input_tensor)
def __init__(self, config, n_layers, use_one_hot_embeddings): super(TopicVectorBert, self).__init__() self.n_layers = n_layers self.all_layer_outputs = [] self.last_key_layer = None self.config = config self.embedding = None self.layer_list = [] self.initializer = base.create_initializer(config.initializer_range) self.attention_mask = None self.use_one_hot_embeddings = use_one_hot_embeddings for layer_idx in range(self.n_layers): layer = ForwardLayer(self.config, self.initializer) self.layer_list.append(layer) self.n_topics = config.n_topics self.use_topic_all_layer = config.use_topic_all_layer self.hidden_size = config.hidden_size topic_emb_len = 4 self.topic_embedding_size = self.hidden_size * topic_emb_len self.n_topics = config.n_topics self.topic_emb_len = topic_emb_len self.use_one_hot_embeddings = use_one_hot_embeddings self.topic_embedding = tf.Variable(lambda: self.initializer( shape=(self.n_topics, self.topic_embedding_size ), dtype=tf.float32), name="topic_embedding")
def __init__(self, old_config: JsonConfig): self.n_layers = 3 self.all_layer_outputs = [] self.last_key_layer = None self.old_config = old_config self.inner_config = self.build_config(old_config, old_config.mid_expanding_factor) self.embedding = None self.layer_list = [] self.initializer = base.create_initializer( self.inner_config.initializer_range) self.token_type_table = tf.compat.v1.get_variable( name="token_type_embeddings", shape=[ self.inner_config.type_vocab_size, self.inner_config.hidden_size ], initializer=self.initializer) self.full_position_embeddings = tf.compat.v1.get_variable( name="position_embeddings", shape=[ self.inner_config.max_position_embeddings, self.inner_config.hidden_size ], initializer=self.initializer) with tf.compat.v1.variable_scope("mid"): for layer_idx in range(self.n_layers): with tf.compat.v1.variable_scope("layer_%d" % layer_idx): layer = ForwardLayer(self.inner_config, self.initializer) self.layer_list.append(layer)
def __init__( self, config, use_one_hot_embeddings, is_training, masked_input_ids, input_mask, segment_ids, tt_input_ids, tt_input_mask, tt_segment_ids, ): all_input_ids = tf.concat([masked_input_ids, tt_input_ids], axis=0) all_input_mask = tf.concat([input_mask, tt_input_mask], axis=0) all_segment_ids = tf.concat([segment_ids, tt_segment_ids], axis=0) self.config = config self.lm_batch_size, _ = get_shape_list2(masked_input_ids) self.model = BertModel(config, is_training, all_input_ids, all_input_mask, all_segment_ids, use_one_hot_embeddings) initializer = base.create_initializer(config.initializer_range) self.tt_layer = ForwardLayer(config, initializer) self.tt_input_mask = tt_input_mask seq_output = self.model.get_sequence_output()[self.lm_batch_size:] tt_batch_size, seq_length = get_shape_list2(tt_input_ids) tt_attention_mask = create_attention_mask_from_input_mask2( seq_output, self.tt_input_mask) print('tt_attention_mask', tt_attention_mask.shape) print("seq_output", seq_output.shape) seq_output = self.tt_layer.apply_3d(seq_output, tt_batch_size, seq_length, tt_attention_mask) self.tt_feature = mimic_pooling(seq_output, self.config.hidden_size, self.config.initializer_range)
def __init__(self, config, n_layers, **kwargs): kwargs['autocast'] = False super(UpperTransformer, self).__init__(kwargs) self.n_layers = n_layers self.all_layer_outputs = [] self.last_key_layer = None self.config = config self.embedding = None self.layer_list = [] self.initializer = base.create_initializer(config.initializer_range) self.layer_idx_base = 0 for layer_idx in range(self.n_layers): with tf.compat.v1.variable_scope("layer_%d" % layer_idx): layer = ForwardLayer(self.config, self.initializer) self.layer_list.append(layer)