def __init__(self, config, **kwargs): super().__init__(**kwargs) self.hidden_size = config.hidden_size intermediate_size = config.intermediate_size intermediate_act_fn = get_activation(config.intermediate_act_fn) kwargs['name'] = 'attention' self.attention_layer = MultiHeadAttention(config, **kwargs) if config.get('svd_units', 0) > 0: self.intermediate_layer0 = layers.Dense(config.svd_units, name='dense0') else: self.intermediate_layer0 = None self.intermediate_layer = layers.Dense(intermediate_size, name='dense', activation=intermediate_act_fn) if config.get('svd_units', 0) > 0: self.output_layer0 = layers.Dense(config.svd_units, name='dense0') else: self.output_layer0 = None self.output_layer = layers.Dense(self.hidden_size, name='dense') self.output_dropout = layers.Dropout(config.hidden_dropout_prob, seed=config.random_seed) self.output_norm_layer = layers.LayerNormalization(name="layer_norm", axis=-1, epsilon=1e-12) self.attention = None self.debug_save_dir = config.debug_save_dir if config.debug else None
def __init__(self, config, **kwargs): super().__init__(**kwargs) self.bert_encoder = Bert(config) initializer = get_initializer(config) self.num_classes = config.num_classes self.max_seq_length = config.max_seq_length self.pooler = layers.Dense(config.hidden_size, kernel_initializer=initializer, name='bert/pooler/dense', activation='tanh') self.cls_dropout_layer = layers.Dropout(config.hidden_dropout_prob) self.num_choices = config.get('num_choices', 0) num_classes = 1 if self.num_choices else self.num_classes self.cls_layer = layers.Dense(num_classes, kernel_initializer=initializer, name='classifier/dense') self.pooled_output = None self.attentions = None self.encoded_output = None self.embeddings = None self.logits = None if config.use_replace_map: self.replace_map = { 'LayerNorm': 'layer_norm', 'bert/': 'bert_' + config.task + '/bert/' } else: self.replace_map = {} self.data_builder = BertClassifierDataBuilder(config)
def __init__(self, config, **kwargs): super().__init__(**kwargs) hidden_size = config.hidden_size self.num_heads = config.num_heads self.attn_head_size = config.attention_head_size qkv_size = self.attn_head_size * self.num_heads initializer = get_initializer(config) self.query_layer = layers.Dense(qkv_size, name="self/query", kernel_initializer=initializer) self.key_layer = layers.Dense(qkv_size, name="self/key", kernel_initializer=initializer) self.value_layer = layers.Dense(qkv_size, name="self/value", kernel_initializer=initializer) self.attn_dropout = layers.Dropout(config.attention_dropout_prob) self.attn_output_layer = layers.Dense(hidden_size, name='output/dense', kernel_initializer=initializer) self.attn_output_dropout = layers.Dropout(config.hidden_dropout_prob, seed=config.random_seed) self.attn_norm_layer = layers.LayerNormalization( name="output/layer_norm", axis=-1, epsilon=1e-12) self.w_layer = layers.Dense(1, name="self/w") self.attention = None self.random_seed = config.random_seed self.debug = config.debug self.debug_save_dir = config.debug_save_dir if config.debug else None
def __init__(self, config, **kwargs): super().__init__(**kwargs) self.ebert_encoder = Ebert(config) initializer = get_initializer(config) self.num_classes = config.num_classes self.pooler = layers.Dense(config.hidden_size, kernel_initializer=initializer, name='ebert/pooler/dense', activation='tanh') self.cls_dropout_layer = layers.Dropout(config.hidden_dropout_prob) self.num_choices = config.get('num_choices', 0) num_classes = 1 if self.num_choices else self.num_classes self.cls_layer = layers.Dense(num_classes, kernel_initializer=initializer, name='classifier/dense') self.max_first_length = config.max_first_length + 2 self.max_second_length = config.max_seq_length - self.max_first_length self.num_choices = config.get('num_choices', 0) self.pooled_output = None self.encoded_output = None self.embeddings = None self.logits = None self.first_embeddings = None self.second_embeddings = None task = config.task replace_map = OrderedDict({ 'LayerNorm': 'layer_norm', 'bert/pooler': 'ebert_{}/ebert/pooler'.format(task), 'bert/embeddings': 'ebert_{}/ebert/embeddings'.format(task) }) # upper layers must be replaced first (i.e., longest match) layer_key = 'bert/encoder/layer_{}' layer_val = 'ebert_{}/ebert/{}_encoder/layer_{}' for layer_idx in range(config.sep_layers, config.num_hidden_layers): k = layer_key.format(layer_idx) replace_map[k] = layer_val.format(task, 'upper', layer_idx) for layer_idx in range(config.sep_layers): k = layer_key.format(layer_idx) replace_map[k] = layer_val.format(task, 'lower', layer_idx) if config.use_replace_map: self.replace_map = replace_map else: self.replace_map = {} self.data_builder = EbertClassifierDataBuilder(config)
def __init__(self, config, **kwargs): super().__init__(**kwargs) self.ebert_encoder = Ebert(config) initializer = get_initializer(config) num_classes = config.num_classes if num_classes: self.pooler = layers.Dense(config.hidden_size, kernel_initializer=initializer, name='pooler/dense', activation='tanh') self.cls_layer = layers.Dense(config.num_classes, kernel_initializer=initializer, name='answer_class/dense') self.span_layer = layers.Dense(2, kernel_initializer=initializer, name='answer_classifier/dense') self.max_first_length = self.ebert_encoder.max_first_length self.max_c_length = self.ebert_encoder.max_second_length task = config.task replace_map = OrderedDict({ 'LayerNorm': 'layer_norm', 'bert/answer_classifier': 'ebert_{}/ebert/answer_classifier'.format(task), 'bert/embeddings': 'ebert_{}/ebert/embeddings'.format(task) }) # upper layers must be replaced first (i.e., longest match) layer_key = 'bert/encoder/layer_{}' layer_val = 'ebert_{}/ebert/{}_encoder/layer_{}' for layer_idx in range(config.sep_layers, config.num_hidden_layers): k = layer_key.format(layer_idx) replace_map[k] = layer_val.format(task, 'upper', layer_idx) for layer_idx in range(config.sep_layers): k = layer_key.format(layer_idx) replace_map[k] = layer_val.format(task, 'lower', layer_idx) self.replace_map = replace_map if config.use_replace_map else {} self.encoded_output = None self.q_embeddings = None self.c_embeddings = None self.logits = None self.data_builder = EbertQaDataBuilder(config)