def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.hidden_size = config.hidden_size
        intermediate_size = config.intermediate_size
        intermediate_act_fn = get_activation(config.intermediate_act_fn)

        kwargs['name'] = 'attention'
        self.attention_layer = MultiHeadAttention(config, **kwargs)

        if config.get('svd_units', 0) > 0:
            self.intermediate_layer0 = layers.Dense(config.svd_units,
                                                    name='dense0')
        else:
            self.intermediate_layer0 = None
        self.intermediate_layer = layers.Dense(intermediate_size,
                                               name='dense',
                                               activation=intermediate_act_fn)
        if config.get('svd_units', 0) > 0:
            self.output_layer0 = layers.Dense(config.svd_units, name='dense0')
        else:
            self.output_layer0 = None

        self.output_layer = layers.Dense(self.hidden_size, name='dense')
        self.output_dropout = layers.Dropout(config.hidden_dropout_prob,
                                             seed=config.random_seed)
        self.output_norm_layer = layers.LayerNormalization(name="layer_norm",
                                                           axis=-1,
                                                           epsilon=1e-12)

        self.attention = None
        self.debug_save_dir = config.debug_save_dir if config.debug else None
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.bert_encoder = Bert(config)

        initializer = get_initializer(config)
        self.num_classes = config.num_classes
        self.max_seq_length = config.max_seq_length
        self.pooler = layers.Dense(config.hidden_size,
                                   kernel_initializer=initializer,
                                   name='bert/pooler/dense',
                                   activation='tanh')

        self.cls_dropout_layer = layers.Dropout(config.hidden_dropout_prob)
        self.num_choices = config.get('num_choices', 0)
        num_classes = 1 if self.num_choices else self.num_classes
        self.cls_layer = layers.Dense(num_classes,
                                      kernel_initializer=initializer,
                                      name='classifier/dense')

        self.pooled_output = None
        self.attentions = None
        self.encoded_output = None
        self.embeddings = None
        self.logits = None
        if config.use_replace_map:
            self.replace_map = {
                'LayerNorm': 'layer_norm',
                'bert/': 'bert_' + config.task + '/bert/'
            }
        else:
            self.replace_map = {}
        self.data_builder = BertClassifierDataBuilder(config)
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        hidden_size = config.hidden_size
        self.num_heads = config.num_heads
        self.attn_head_size = config.attention_head_size
        qkv_size = self.attn_head_size * self.num_heads
        initializer = get_initializer(config)
        self.query_layer = layers.Dense(qkv_size,
                                        name="self/query",
                                        kernel_initializer=initializer)
        self.key_layer = layers.Dense(qkv_size,
                                      name="self/key",
                                      kernel_initializer=initializer)
        self.value_layer = layers.Dense(qkv_size,
                                        name="self/value",
                                        kernel_initializer=initializer)
        self.attn_dropout = layers.Dropout(config.attention_dropout_prob)
        self.attn_output_layer = layers.Dense(hidden_size,
                                              name='output/dense',
                                              kernel_initializer=initializer)
        self.attn_output_dropout = layers.Dropout(config.hidden_dropout_prob,
                                                  seed=config.random_seed)
        self.attn_norm_layer = layers.LayerNormalization(
            name="output/layer_norm", axis=-1, epsilon=1e-12)

        self.w_layer = layers.Dense(1, name="self/w")
        self.attention = None
        self.random_seed = config.random_seed
        self.debug = config.debug
        self.debug_save_dir = config.debug_save_dir if config.debug else None
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.ebert_encoder = Ebert(config)

        initializer = get_initializer(config)
        self.num_classes = config.num_classes
        self.pooler = layers.Dense(config.hidden_size,
                                   kernel_initializer=initializer,
                                   name='ebert/pooler/dense',
                                   activation='tanh')
        self.cls_dropout_layer = layers.Dropout(config.hidden_dropout_prob)
        self.num_choices = config.get('num_choices', 0)
        num_classes = 1 if self.num_choices else self.num_classes
        self.cls_layer = layers.Dense(num_classes,
                                      kernel_initializer=initializer,
                                      name='classifier/dense')

        self.max_first_length = config.max_first_length + 2
        self.max_second_length = config.max_seq_length - self.max_first_length
        self.num_choices = config.get('num_choices', 0)

        self.pooled_output = None
        self.encoded_output = None
        self.embeddings = None
        self.logits = None
        self.first_embeddings = None
        self.second_embeddings = None

        task = config.task
        replace_map = OrderedDict({
            'LayerNorm':
            'layer_norm',
            'bert/pooler':
            'ebert_{}/ebert/pooler'.format(task),
            'bert/embeddings':
            'ebert_{}/ebert/embeddings'.format(task)
        })
        # upper layers must be replaced first (i.e., longest match)
        layer_key = 'bert/encoder/layer_{}'
        layer_val = 'ebert_{}/ebert/{}_encoder/layer_{}'
        for layer_idx in range(config.sep_layers, config.num_hidden_layers):
            k = layer_key.format(layer_idx)
            replace_map[k] = layer_val.format(task, 'upper', layer_idx)
        for layer_idx in range(config.sep_layers):
            k = layer_key.format(layer_idx)
            replace_map[k] = layer_val.format(task, 'lower', layer_idx)
        if config.use_replace_map:
            self.replace_map = replace_map
        else:
            self.replace_map = {}
        self.data_builder = EbertClassifierDataBuilder(config)
Exemple #5
0
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.ebert_encoder = Ebert(config)

        initializer = get_initializer(config)
        num_classes = config.num_classes
        if num_classes:
            self.pooler = layers.Dense(config.hidden_size,
                                       kernel_initializer=initializer,
                                       name='pooler/dense',
                                       activation='tanh')
            self.cls_layer = layers.Dense(config.num_classes,
                                          kernel_initializer=initializer,
                                          name='answer_class/dense')

        self.span_layer = layers.Dense(2,
                                       kernel_initializer=initializer,
                                       name='answer_classifier/dense')
        self.max_first_length = self.ebert_encoder.max_first_length
        self.max_c_length = self.ebert_encoder.max_second_length

        task = config.task
        replace_map = OrderedDict({
            'LayerNorm':
            'layer_norm',
            'bert/answer_classifier':
            'ebert_{}/ebert/answer_classifier'.format(task),
            'bert/embeddings':
            'ebert_{}/ebert/embeddings'.format(task)
        })
        # upper layers must be replaced first (i.e., longest match)
        layer_key = 'bert/encoder/layer_{}'
        layer_val = 'ebert_{}/ebert/{}_encoder/layer_{}'
        for layer_idx in range(config.sep_layers, config.num_hidden_layers):
            k = layer_key.format(layer_idx)
            replace_map[k] = layer_val.format(task, 'upper', layer_idx)
        for layer_idx in range(config.sep_layers):
            k = layer_key.format(layer_idx)
            replace_map[k] = layer_val.format(task, 'lower', layer_idx)

        self.replace_map = replace_map if config.use_replace_map else {}
        self.encoded_output = None
        self.q_embeddings = None
        self.c_embeddings = None
        self.logits = None
        self.data_builder = EbertQaDataBuilder(config)