def multitask_attention_model(output_size, pos_vocab_size, lex_vocab_size, config_params, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) embedding_size = 768 max_seq_len = 512 in_id = Input(shape=(max_seq_len, ), name="input_ids") in_mask = Input(shape=(max_seq_len, ), name="input_masks") in_segment = Input(shape=(max_seq_len, ), name="segment_ids") bert_inputs = [in_id, in_mask, in_segment] bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3, pooling="mean")(bert_inputs) bert_output = Reshape((max_seq_len, embedding_size))(bert_output_) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') bert_inputs.append(in_mask) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(bert_output) attention = SeqSelfAttention(units=128, attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(attention) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(attention) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=bert_inputs, outputs=[wsd_output, pos_output, lex_output], name='Bert_BiLSTM_ATT_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def attention_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, visualize=False, plot=False, tokenizer=None): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size) if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(attention) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(attention) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='BiLSTM_ATT_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def attention_model(vocabulary_size, config_params, output_size, weights=None, tokenizer=None, visualize=False, plot=False): hidden_size = config_params['hidden_size'] batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = False # To fine-tune pretrained embeddings or not embedding = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[weights], trainable=train, mask_zero=True)(in_sentences) else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) model = Model(inputs=[in_sentences, in_mask], outputs=output, name="SensEmbed_Attention") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def baseline_model(vocabulary_size, config_params, output_size, tokenizer=None, visualize=False, plot=False): name = 'Baseline' hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None, ), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 name = f'Elmo_{name}' else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(embedding) logits = TimeDistributed(Dense(output_size))(bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) model = Model(inputs=[in_sentences, in_mask], outputs=output, name=name) model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def attention_model(output_size, max_seq_len, config_params, visualize=False, plot=False): embedding_size = 768 hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) in_id = Input(shape=(max_seq_len, ), name="input_ids") in_mask = Input(shape=(max_seq_len, ), name="input_masks") in_segment = Input(shape=(max_seq_len, ), name="segment_ids") bert_inputs = [in_id, in_mask, in_segment] bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3, pooling="mean")(bert_inputs) bert_output = Reshape((max_seq_len, embedding_size))(bert_output_) bilstm = Bidirectional( LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(bert_output) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') bert_inputs.append(in_mask) logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) mdl = Model(inputs=bert_inputs, outputs=output, name="Bert_Attention_BiLSTM") mdl.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, mdl) return mdl
def multitask_seq2seq_model(output_size, pos_vocab_size, lex_vocab_size, config_params, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) embedding_size = 768 max_seq_len = 512 in_id = Input(shape=(max_seq_len, ), name="input_ids") in_mask = Input(shape=(max_seq_len, ), name="input_masks") in_segment = Input(shape=(max_seq_len, ), name="segment_ids") bert_inputs_ = [in_id, in_mask, in_segment] bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3, pooling="mean")(bert_inputs_) bert_output = Reshape((max_seq_len, embedding_size))(bert_output_) input_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') bert_inputs_.append(input_mask) bilstm, forward_h, _, backward_h, _ = Bidirectional( LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, None, embedding_size)), merge_mode='sum', name='Encoder_BiLSTM')(bert_output) state_h = Concatenate()([forward_h, backward_h]) context = SeqSelfAttention(units=128)([bilstm, state_h]) concat = Concatenate()([bilstm, context]) decoder_fwd_lstm = LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size), name='Decoder_FWD_LSTM')(concat) decoder_bck_lstm = LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size), go_backwards=True, name='Decoder_BWD_LSTM')(decoder_fwd_lstm) decoder_bilstm = Concatenate()([decoder_fwd_lstm, decoder_bck_lstm]) logits = TimeDistributed(Dense(output_size), name='WSD_logits')(decoder_bilstm) logits_mask = Add(name="Masked_logits")([logits, input_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(decoder_bilstm) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(decoder_bilstm) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=bert_inputs_, outputs=[wsd_output, pos_output, lex_output], name='Bert_Attention_Seq2Seq_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['accuracy']) visualize_plot_mdl(visualize, plot, model) return model
def seq2seq_model(output_size, max_seq_len, config_params, visualize=False, plot=False): drop, rdrop = 0.2, 0.2 embedding_size = 768 hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) in_id = Input(shape=(max_seq_len, ), name="input_ids") in_mask = Input(shape=(max_seq_len, ), name="input_masks") in_segment = Input(shape=(max_seq_len, ), name="segment_ids") bert_inputs = [in_id, in_mask, in_segment] bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3, pooling="mean")(bert_inputs) bert_output = Reshape((max_seq_len, embedding_size))(bert_output_) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') bert_inputs.append(in_mask) encoder_bilstm = Bidirectional(LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size)), merge_mode='sum', name='Encoder_BiLSTM_1')(bert_output) encoder_bilstm2 = Bidirectional(LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size)), merge_mode='sum', name='Encoder_BiLSTM_2') (encoder_outputs, forward_h, forward_c, backward_h, backward_c) = encoder_bilstm2(encoder_bilstm) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) encoder_states = [state_h, state_c] encoder_attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(encoder_outputs) decoder_fwd_lstm, _, _ = LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size), name='Decoder_FWD_LSTM')( encoder_attention, initial_state=[forward_h, backward_h]) decoder_bck_lstm, _, _ = LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size), go_backwards=True, name='Decoder_BWD_LSTM')(decoder_fwd_lstm) decoder_bilstm = Concatenate()([decoder_fwd_lstm, decoder_bck_lstm]) decoder_output = TimeDistributed(Dense(output_size), name='TimeDist_Dense')(decoder_bilstm) logits_mask = Add()([decoder_output, in_mask]) decoder_outputs = Softmax()(logits_mask) model = Model(bert_inputs, outputs=decoder_outputs, name="Bert_Attention_Seq2Seq") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def baseline_model(vocabulary_size, config_params, output_size, lex_output_size, dom_output_size, tokenizer=None, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None, ), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(embedding) stacked_bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(bilstm) lex_logits = TimeDistributed(Dense(lex_output_size), name='LEX_logits')(bilstm) dom_logits = TimeDistributed(Dense(dom_output_size), name='DOM_logits')(bilstm) wsd_logits = TimeDistributed(Dense(output_size), name='WSD_logits')(stacked_bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([wsd_logits, in_mask]) wsd_output = Softmax(name="WSD_output")(logits_mask) lex_output = Softmax(name="LEX_output")(lex_logits) dom_output = Softmax(name="DOM_output")(dom_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, dom_output, lex_output], name="Hierarchical") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def seq2seq_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, tokenizer=None, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) embedding_size = int(config_params['embedding_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embeddings = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embeddings = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm, forward_h, _, backward_h, _ = Bidirectional(LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, None, embedding_size)), merge_mode='sum', name='Encoder_BiLSTM')(embeddings) state_h = Concatenate()([forward_h, backward_h]) encoder_attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')([bilstm, state_h]) concat = Concatenate()([encoder_attention, bilstm]) decoder_fwd_lstm, _, _ = LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size), name='Decoder_FWD_LSTM')(concat) decoder_bck_lstm, _, _ = LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size), go_backwards=True, name='Decoder_BWD_LSTM')(decoder_fwd_lstm) decoder_bilstm = Concatenate()([decoder_fwd_lstm, decoder_bck_lstm]) logits = TimeDistributed( Dense(output_size), name='WSD_logits')(decoder_bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add(name="Masked logits")([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(decoder_bilstm) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(decoder_bilstm) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='Seq2Seq_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def seq2seq_model(vocabulary_size, config_params, output_size, weights=None, tokenizer=None, visualize=False, plot=False): drop, rdrop = 0.2, 0.2 hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None encoder_inputs = Input(shape=(None,), dtype=input_type, batch_size=batch_size) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') if tokenizer is not None: encoder_embeddings = ElmoEmbeddingLayer()(encoder_inputs) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = True # To fine-tune pretrained embeddings or not encoder_embeddings = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[ weights], trainable=train, mask_zero=True)(encoder_inputs) else: embedding_size = int(config_params['embedding_size']) encoder_embeddings = Embedding( input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(encoder_inputs) encoder_bilstm = Bidirectional(LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=( None, None, embedding_size) ), merge_mode='sum', name='Encoder_BiLSTM_1')(encoder_embeddings) encoder_bilstm2 = Bidirectional(LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=( None, None, embedding_size) ), merge_mode='sum', name='Encoder_BiLSTM_2') (encoder_outputs, forward_h, forward_c, backward_h, backward_c) = encoder_bilstm2(encoder_bilstm) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) encoder_states = [state_h, state_c] encoder_attention = SeqSelfAttention( attention_activation='sigmoid', name='Attention')(encoder_outputs) decoder_fwd_lstm, _, _ = LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size), name='Decoder_FWD_LSTM')(encoder_attention, initial_state=[forward_h, backward_h]) decoder_bck_lstm, _, _ = LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size), go_backwards=True, name='Decoder_BWD_LSTM')(decoder_fwd_lstm) decoder_bilstm = Concatenate()([decoder_fwd_lstm, decoder_bck_lstm]) decoder_output = TimeDistributed(Dense(output_size), name='TimeDist_Dense')(decoder_bilstm) logits_mask = Add()([decoder_output, in_mask]) decoder_outputs = Softmax()(logits_mask) model = Model([encoder_inputs, in_mask], outputs=decoder_outputs, name="SensEmbed_Seq2Seq_Attention") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def multitask_baseline_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, weights=None, tokenizer=None, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = False # To fine-tune pretrained embeddings or not embedding = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[weights], trainable=train, mask_zero=True)(in_sentences) else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) logits = TimeDistributed(Dense(output_size))(bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(bilstm) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(bilstm) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='SensEmbed_BiLSTM_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model