def __init__(self, hparams: Hparams, **kwargs): super(ErnieEmbedding, self).__init__(**kwargs) self.vocab_size = hparams.vocab_size self.hidden_size = hparams.hidden_size self.initializer_range = hparams.initializer_range self.use_task_id = hparams.use_task_id self.position_embeddings = tf.keras.layers.Embedding( hparams.max_position_embeddings, hparams.hidden_size, embeddings_initializer=get_initializer(self.initializer_range), name="position_embeddings") self.token_type_embeddings = tf.keras.layers.Embedding( hparams.get("type_vocab_size", hparams.get("sent_type_vocab_size")), hparams.hidden_size, embeddings_initializer=get_initializer(self.initializer_range), name="token_type_embeddings") if self.use_task_id: self.task_embeddings = tf.keras.layers.Embedding( hparams.task_type_vocab_size, hparams.hidden_size, embeddings_initializer=get_initializer(self.initializer_range), name="task_type_embeddings") self.layer_norm = tf.keras.layers.LayerNormalization( epsilon=hparams.layer_norm_eps, name="LayerNorm") self.dropout = tf.keras.layers.Dropout(hparams.hidden_dropout_prob)
def __init__(self, hparams: Hparams, **kwargs): super(BertForRoleNer, self).__init__(hparams, **kwargs) pretrained_hparams = hparams.pretrained model_hparams = hparams.model_attributes self.num_labels = hparams.dataset.outputs[0].num self.initializer_range = model_hparams.initializer_range self.bert = BaseLayer.by_name( pretrained_hparams.norm_name)(pretrained_hparams) self.dropout = tf.keras.layers.Dropout( model_hparams.hidden_dropout_prob) # self.bilstm = Bilstm(model_hparams.hidden_size, model_hparams.hidden_dropout_prob, name="bilstm") self.project = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project") self.ner_output = tf.keras.layers.Dense( self.num_labels, kernel_initializer=get_initializer( model_hparams.initializer_range), name='ner_output') self.crf = CRFLayer(self.num_labels, self.initializer_range, name="crf_output")
def __init__(self, hparams: Hparams, **kwargs): super(BertDgcnnForNer, self).__init__(hparams, **kwargs) pretrained_hparams = hparams.pretrained model_hparams = hparams.model_attributes self.num_labels = hparams.dataset.outputs[0].num self.pos_num = hparams.dataset.inputs[-1].num self.initializer_range = model_hparams.initializer_range self.pos_embeddings = tf.keras.layers.Embedding( self.pos_num, 32, embeddings_initializer=get_initializer(model_hparams.initializer_range), name="pos_embedding" ) self.bert = BaseLayer.by_name(pretrained_hparams.norm_name)(pretrained_hparams) self.dropout = tf.keras.layers.Dropout( model_hparams.hidden_dropout_prob ) self.project = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer(model_hparams.initializer_range), name="project" ) self.fusion_project = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer(model_hparams.initializer_range), name="fusion_project" ) self.dgcnn_encoder = DgcnnBlock(model_hparams.hidden_size, [3, 3, 3], [1, 2, 4], name="trigger_dgcnn_encoder") self.ner_output = tf.keras.layers.Dense(self.num_labels, kernel_initializer=get_initializer(model_hparams.initializer_range), name='ner_output') self.crf = CRFLayer(self.num_labels, self.initializer_range, label_mask=hparams.label_mask, name="crf_output")
def __init__(self, hparams: Hparams, **kwargs): super(MultiHeadAttention, self).__init__(**kwargs) if hparams.hidden_size % hparams.num_attention_heads != 0: raise ValueError( f"The hidden size {hparams.hidden_size} is not a multiple of the number of attention " f"heads {hparams.num_attention_heads}") self.output_attentions = hparams.output_attentions self.use_relative_position = False if "use_relative_position" in hparams: self.use_relative_position = hparams.use_relative_position self.num_attention_heads = hparams.num_attention_heads assert hparams.hidden_size % hparams.num_attention_heads == 0 self.attention_head_size = int(hparams.hidden_size / hparams.num_attention_heads) self.all_head_size = self.num_attention_heads * self.attention_head_size self.query = tf.keras.layers.Dense( self.all_head_size, kernel_initializer=get_initializer(hparams.initializer_range), name="query" ) self.key = tf.keras.layers.Dense( self.all_head_size, kernel_initializer=get_initializer(hparams.initializer_range), name="key" ) self.value = tf.keras.layers.Dense( self.all_head_size, kernel_initializer=get_initializer(hparams.initializer_range), name="value" ) self.dropout = tf.keras.layers.Dropout( hparams.attention_probs_dropout_prob )
def __init__(self, hparams: Hparams, **kwargs): super(AlbertEmbedding, self).__init__(**kwargs) self.max_position_embeddings = hparams.max_position_embeddings self.embedding_size = hparams.embedding_size self.initializer_range = hparams.initializer_range self.layer_norm_eps = hparams.layer_norm_eps self.vocab_size = hparams.vocab_size self.type_vocab_size = hparams.type_vocab_size self.hidden_dropout_prob = hparams.hidden_dropout_prob self.position_embeddings = tf.keras.layers.Embedding( self.max_position_embeddings, self.embedding_size, embeddings_initializer=get_initializer(self.initializer_range), name='position_embeddings') self.token_type_embeddings = tf.keras.layers.Embedding( self.type_vocab_size, self.embedding_size, embeddings_initializer=get_initializer(self.initializer_range), name='token_type_embeddings') # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load # any TensorFlow checkpoint file self.LayerNorm = tf.keras.layers.LayerNormalization( epsilon=self.layer_norm_eps, name='LayerNorm') self.dropout = tf.keras.layers.Dropout(self.hidden_dropout_prob)
def __init__(self, filters, window=3, dilation=1, stddev=0.1, initializer_range=0.02, **kwargs): super(Dgcnn, self).__init__(**kwargs) self.conv1 = tf.keras.layers.Conv1D( filters, window, dilation_rate=dilation, padding='SAME', kernel_initializer=get_initializer(initializer_range), bias_initializer=get_bias_initializer('conv')) self.conv2 = tf.keras.layers.Conv1D( filters, window, dilation_rate=dilation, padding='SAME', kernel_initializer=get_initializer(initializer_range), bias_initializer=get_bias_initializer('conv')) self.noise = tf.keras.layers.GaussianNoise(stddev)
def build(self, input_shape): """Build shared word embedding layer """ with tf.name_scope("word_embeddings"): # Create and initialize weights. The random normal initializer was chosen # arbitrarily, and works well. self.word_embeddings = self.add_weight( "weight", shape=[self.vocab_size, self.embedding_size], initializer=get_initializer(self.initializer_range)) self.embedding_hidden_mapping_in = self.add_weight( "embedding_hidden_mapping_in", shape=[self.embedding_size, self.hidden_size], initializer=get_initializer(self.initializer_range) ) super(AlbertEmbeddingV2, self).build(input_shape)
def __init__(self, hparams: Hparams, **kwargs): super(XLNetFeedForward, self).__init__(**kwargs) self.layer_norm = tf.keras.layers.LayerNormalization( epsilon=hparams.layer_norm_eps, name='layer_norm') self.layer_1 = tf.keras.layers.Dense( hparams.d_inner, kernel_initializer=get_initializer(hparams.initializer_range), name='layer_1') self.layer_2 = tf.keras.layers.Dense( hparams.d_model, kernel_initializer=get_initializer(hparams.initializer_range), name='layer_2') self.dropout = tf.keras.layers.Dropout(hparams.dropout) self.activation_function = ACT2FN[hparams.ff_activation]
def __init__(self, config: Hparams, **kwargs): super().__init__(config, **kwargs) config = config.config self.config = config self.output_attentions = config.output_attentions self.output_hidden_states = config.output_hidden_states self.use_cache = config.use_cache self.return_dict = config.use_return_dict self.num_hidden_layers = config.n_layer self.vocab_size = config.vocab_size self.n_embd = config.n_embd self.wte = SharedEmbeddings( config.vocab_size, config.hidden_size, initializer_range=config.initializer_range, name="wte" ) self.wpe = tf.keras.layers.Embedding( config.n_positions, config.n_embd, embeddings_initializer=get_initializer(config.initializer_range), name="wpe", ) self.drop = tf.keras.layers.Dropout(config.embd_pdrop) if config.has_key("layers"): self.h = [TFBlock(config.n_ctx, config, scale=True, name="h_._{}".format(i)) for i in range(config.layers.start, config.layers.end, config.layers.step)] self.num_hidden_layers = len(self.h) else: self.h = [TFBlock(config.n_ctx, config, scale=True, name="h_._{}".format(i)) for i in range(config.n_layer)] self.ln_f = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_epsilon, name="ln_f") self.pooler = SequenceSummary(config, name="pooler")
def __init__(self, hparams: Hparams, **kwargs): super(BertPooler, self).__init__(**kwargs) self.dense = tf.keras.layers.Dense(hparams.hidden_size, kernel_initializer=get_initializer( hparams.initializer_range), activation='tanh', name='dense')
def __init__(self, config: Hparams, **kwargs): super(SequenceSummary, self).__init__(**kwargs) initializer_range = config.initializer_range self.summary_type = config.summary_type if 'summary_use_proj' in config else 'last' if self.summary_type == 'attn': # We should use a standard multi-head attention module with absolute positional embedding for that. # Cf. https://github.com/zihangdai/xlnet/blob/master/modeling.py#L253-L276 # We can probably just use the multi-head attention module of PyTorch >=1.1.0 raise NotImplementedError self.has_summary = 'summary_use_proj' in config and config.summary_use_proj if self.has_summary: if 'summary_proj_to_labels' in config and config.summary_proj_to_labels and config.num_labels > 0: num_classes = config.num_labels else: num_classes = config.hidden_size self.summary = tf.keras.layers.Dense( num_classes, kernel_initializer=get_initializer(initializer_range), name='summary') self.has_activation = 'summary_activation' in config and config.summary_activation == 'tanh' if self.has_activation: self.activation = tf.keras.activations.tanh self.has_first_dropout = 'summary_first_dropout' in config and config.summary_first_dropout > 0 if self.has_first_dropout: self.first_dropout = tf.keras.layers.Dropout( config.summary_first_dropout) self.has_last_dropout = 'summary_last_dropout' in config and config.summary_last_dropout > 0 if self.has_last_dropout: self.last_dropout = tf.keras.layers.Dropout( config.summary_last_dropout)
def __init__(self, hparams: Hparams, **kwargs): super(TransformerIntermediate, self).__init__(**kwargs) self.dense = tf.keras.layers.Dense(hparams.intermediate_size, kernel_initializer=get_initializer( hparams.initializer_range), name='dense') self.intermediate_act_fn = ACT2FN[hparams.hidden_act]
def __init__(self, hparams: Hparams, **kwargs): super(BertPredicationTaskTransform, self).__init__(**kwargs) self.dense = tf.keras.layers.Dense(hparams.hidden_size, kernel_initializer=get_initializer( hparams.initializer_range), name='dense') self.transform_act_fn = ACT2FN[hparams.hidden_act] self.layer_norm = tf.keras.layers.LayerNormalization( epsilon=hparams.layer_norm_eps, name="LayerNorm")
def build(self, input_shape): """Build shared word embedding layer Shared weights logic adapted from https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24 """ self.weight = self.add_weight( "weight", shape=[self.vocab_size, self.hidden_size], initializer=get_initializer(self.initializer_range)) super(SharedEmbeddings, self).build(input_shape)
def __init__(self, hparams: Hparams, **kwargs): super(ElectraEmbeddings, self).__init__(hparams, **kwargs) self.vocab_size = hparams.vocab_size self.hidden_size = hparams.embedding_size self.initializer_range = hparams.initializer_range self.position_embeddings = tf.keras.layers.Embedding( hparams.max_position_embeddings, self.hidden_size, embeddings_initializer=get_initializer(self.initializer_range), name="position_embeddings") self.token_type_embeddings = tf.keras.layers.Embedding( hparams.type_vocab_size, self.hidden_size, embeddings_initializer=get_initializer(self.initializer_range), name="token_type_embeddings") self.layer_norm = tf.keras.layers.LayerNormalization( epsilon=hparams.layer_norm_eps, name="LayerNorm") self.dropout = tf.keras.layers.Dropout(hparams.hidden_dropout_prob)
def __init__(self, filter, window, initializer_range=0.02, **kwargs): super(Textcnn, self).__init__(**kwargs) self.conv = tf.keras.layers.Conv1D( filter, window, padding='SAME', kernel_initializer=get_initializer(initializer_range), bias_initializer=get_bias_initializer('conv')) self.batch_norm = tf.keras.layers.BatchNormalization() self.act_fn = ACT2FN['relu'] self.max_pool = tf.keras.layers.GlobalMaxPool1D()
def build(self, input_shape): self.transition_params = self.add_weight( "transition_params", shape=[self.num_labels, self.num_labels], initializer=get_initializer(self.initializer_range) ) if self.label_mask is not None: label_mask = tf.cast(self.label_mask, tf.float32) label_mask = (1.0 - label_mask) * -10000.0 self.transition_params += label_mask super(CRFLayer, self).build(input_shape)
def __init__(self, hparams: Hparams, **kwargs): super(BertForSeqClassification, self).__init__(hparams, **kwargs) self.num_lables = hparams.dataset.outputs[0].num pretrained_hparams = hparams.pretrained model_hparams = hparams.model_attributes # self.bert = Bert(pretrained_hparams, name='bert') assert pretrained_hparams.norm_name in ['bert', 'albert', 'albert_brightmart', "ernie", "xlnet", "electra"], \ ValueError(f"{pretrained_hparams.norm_name} not be supported.") self.encoder = BaseLayer.by_name( pretrained_hparams.norm_name)(pretrained_hparams) self.dropout = tf.keras.layers.Dropout( model_hparams.hidden_dropout_prob) self.project = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project") self.classifier = tf.keras.layers.Dense( self.num_lables, kernel_initializer=get_initializer( model_hparams.initializer_range), name="classifier")
def __init__(self, hparams: Hparams, **kwargs): super(FeedForwardAddAndNorm, self).__init__(**kwargs) self.dense = tf.keras.layers.Dense( hparams.hidden_size, kernel_initializer=get_initializer(hparams.initializer_range), name="dense" ) self.layer_norm = tf.keras.layers.LayerNormalization( epsilon=hparams.layer_norm_eps, name="LayerNorm" ) self.dropout = tf.keras.layers.Dropout( hparams.hidden_dropout_prob )
def __init__(self, hparams: Hparams, **kwargs): super(BertForQA, self).__init__(hparams, **kwargs) pretrained_hparams = hparams.pretrained model_hparams = hparams.model_attributes self.start_n_top = model_hparams.start_n_top self.seq_len = hparams.dataset.tokenizer.max_len assert pretrained_hparams.norm_name not in ["xlnet_chinese"], \ ValueError(f"{pretrained_hparams.norm_name} not be supported.") self.encode_pretrained = BaseLayer.by_name( pretrained_hparams.norm_name)(pretrained_hparams) self.qa_layer = BaseLayer.by_name(model_hparams.qa_layer_name)( model_hparams.hidden_size, self.seq_len, self.start_n_top, self.start_n_top, get_initializer(model_hparams.initializer_range), model_hparams.hidden_dropout_prob)
def build(self, input_shape): initializer = get_initializer(self.initializer_range) self.q = self.add_weight(shape=(self.d_model, self.n_head, self.d_head), initializer=initializer, trainable=True, name='q') self.k = self.add_weight(shape=(self.d_model, self.n_head, self.d_head), initializer=initializer, trainable=True, name='k') self.v = self.add_weight(shape=(self.d_model, self.n_head, self.d_head), initializer=initializer, trainable=True, name='v') self.o = self.add_weight(shape=(self.d_model, self.n_head, self.d_head), initializer=initializer, trainable=True, name='o') self.r = self.add_weight(shape=(self.d_model, self.n_head, self.d_head), initializer=initializer, trainable=True, name='r') self.r_r_bias = self.add_weight(shape=(self.n_head, self.d_head), initializer='zeros', trainable=True, name='r_r_bias') self.r_s_bias = self.add_weight(shape=(self.n_head, self.d_head), initializer='zeros', trainable=True, name='r_s_bias') self.r_w_bias = self.add_weight(shape=(self.n_head, self.d_head), initializer='zeros', trainable=True, name='r_w_bias') self.seg_embed = self.add_weight(shape=(2, self.n_head, self.d_head), initializer=initializer, trainable=True, name='seg_embed') super(XLNetRelativeAttention, self).build(input_shape)
def __init__(self, hparams: Hparams, **kwargs): super(BertForRelationExtract, self).__init__(hparams, **kwargs) pretrained_hparams = hparams.pretrained model_hparams = hparams.model_attributes self.hidden_size = model_hparams.hidden_size self.num_labels = hparams.dataset.outputs[0].num self.initializer_range = model_hparams.initializer_range self.bert = BaseLayer.by_name( pretrained_hparams.norm_name)(pretrained_hparams) self.dropout = tf.keras.layers.Dropout( model_hparams.hidden_dropout_prob) self.project1 = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project1") self.project2 = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project2") self.project3 = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project3") self.project4 = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project4") self.project5 = tf.keras.layers.Dense( model_hparams.hidden_size, kernel_initializer=get_initializer( model_hparams.initializer_range), name="project5") self.e1_attention = MultiHeadAttention(model_hparams, name="entity1_attention_fusion") self.e2_attention = MultiHeadAttention(model_hparams, name="entity2_attention_fusion") self.attention = MultiHeadAttention(model_hparams, name="attention_fusion") self.classifer = tf.keras.layers.Dense( self.num_labels, kernel_initializer=get_initializer( model_hparams.initializer_range), name="classifier")
def build(self, input_shape): self.weight = self.add_weight( "weight", shape=[self.nx, self.nf], initializer=get_initializer(self.initializer_range) ) self.bias = self.add_weight("bias", shape=[1, self.nf], initializer=tf.zeros_initializer())
def build(self, input_shape): initializer = get_initializer(self.initializer_range) self.mask_emb = self.add_weight(shape=(1, 1, self.d_model), initializer=initializer, trainable=True, name='mask_emb')
def build(self, input_shape): self.word_embeddings = self.add_weight( "weight", shape=[self.vocab_size, self.hidden_size], initializer=get_initializer(self.initializer_range)) super(ElectraEmbeddings, self).build(input_shape)
def __init__(self, hparams, **kwargs): super(BertNSPTask, self).__init__(**kwargs) self.seq_relationship = tf.keras.layers.Dense( 2, kernel_initializer=get_initializer(hparams.initializer_range), name='seq_relationship')