def attention_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, visualize=False, plot=False, tokenizer=None): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size) if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(attention) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(attention) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='BiLSTM_ATT_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def attention_model(vocabulary_size, config_params, output_size, weights=None, tokenizer=None, visualize=False, plot=False): hidden_size = config_params['hidden_size'] batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = False # To fine-tune pretrained embeddings or not embedding = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[weights], trainable=train, mask_zero=True)(in_sentences) else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) model = Model(inputs=[in_sentences, in_mask], outputs=output, name="SensEmbed_Attention") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def baseline_model(vocabulary_size, config_params, output_size, tokenizer=None, visualize=False, plot=False): name = 'Baseline' hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None, ), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 name = f'Elmo_{name}' else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(embedding) logits = TimeDistributed(Dense(output_size))(bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) model = Model(inputs=[in_sentences, in_mask], outputs=output, name=name) model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def baseline_model(vocabulary_size, config_params, output_size, lex_output_size, dom_output_size, tokenizer=None, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None, ), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(embedding) stacked_bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(bilstm) lex_logits = TimeDistributed(Dense(lex_output_size), name='LEX_logits')(bilstm) dom_logits = TimeDistributed(Dense(dom_output_size), name='DOM_logits')(bilstm) wsd_logits = TimeDistributed(Dense(output_size), name='WSD_logits')(stacked_bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([wsd_logits, in_mask]) wsd_output = Softmax(name="WSD_output")(logits_mask) lex_output = Softmax(name="LEX_output")(lex_logits) dom_output = Softmax(name="DOM_output")(dom_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, dom_output, lex_output], name="Hierarchical") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def seq2seq_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, tokenizer=None, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) embedding_size = int(config_params['embedding_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embeddings = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embeddings = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm, forward_h, _, backward_h, _ = Bidirectional(LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, None, embedding_size)), merge_mode='sum', name='Encoder_BiLSTM')(embeddings) state_h = Concatenate()([forward_h, backward_h]) encoder_attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')([bilstm, state_h]) concat = Concatenate()([encoder_attention, bilstm]) decoder_fwd_lstm, _, _ = LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size), name='Decoder_FWD_LSTM')(concat) decoder_bck_lstm, _, _ = LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size), go_backwards=True, name='Decoder_BWD_LSTM')(decoder_fwd_lstm) decoder_bilstm = Concatenate()([decoder_fwd_lstm, decoder_bck_lstm]) logits = TimeDistributed( Dense(output_size), name='WSD_logits')(decoder_bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add(name="Masked logits")([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(decoder_bilstm) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(decoder_bilstm) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='Seq2Seq_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def seq2seq_model(vocabulary_size, config_params, output_size, weights=None, tokenizer=None, visualize=False, plot=False): drop, rdrop = 0.2, 0.2 hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None encoder_inputs = Input(shape=(None,), dtype=input_type, batch_size=batch_size) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') if tokenizer is not None: encoder_embeddings = ElmoEmbeddingLayer()(encoder_inputs) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = True # To fine-tune pretrained embeddings or not encoder_embeddings = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[ weights], trainable=train, mask_zero=True)(encoder_inputs) else: embedding_size = int(config_params['embedding_size']) encoder_embeddings = Embedding( input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(encoder_inputs) encoder_bilstm = Bidirectional(LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=( None, None, embedding_size) ), merge_mode='sum', name='Encoder_BiLSTM_1')(encoder_embeddings) encoder_bilstm2 = Bidirectional(LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=( None, None, embedding_size) ), merge_mode='sum', name='Encoder_BiLSTM_2') (encoder_outputs, forward_h, forward_c, backward_h, backward_c) = encoder_bilstm2(encoder_bilstm) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) encoder_states = [state_h, state_c] encoder_attention = SeqSelfAttention( attention_activation='sigmoid', name='Attention')(encoder_outputs) decoder_fwd_lstm, _, _ = LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size), name='Decoder_FWD_LSTM')(encoder_attention, initial_state=[forward_h, backward_h]) decoder_bck_lstm, _, _ = LSTM(hidden_size, dropout=drop, recurrent_dropout=rdrop, return_sequences=True, return_state=True, input_shape=(None, None, embedding_size), go_backwards=True, name='Decoder_BWD_LSTM')(decoder_fwd_lstm) decoder_bilstm = Concatenate()([decoder_fwd_lstm, decoder_bck_lstm]) decoder_output = TimeDistributed(Dense(output_size), name='TimeDist_Dense')(decoder_bilstm) logits_mask = Add()([decoder_output, in_mask]) decoder_outputs = Softmax()(logits_mask) model = Model([encoder_inputs, in_mask], outputs=decoder_outputs, name="SensEmbed_Seq2Seq_Attention") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def multitask_baseline_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, weights=None, tokenizer=None, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = False # To fine-tune pretrained embeddings or not embedding = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[weights], trainable=train, mask_zero=True)(in_sentences) else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) logits = TimeDistributed(Dense(output_size))(bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(bilstm) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(bilstm) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='SensEmbed_BiLSTM_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model