def __init__(self, config, w_embeddings=None, c_embeddings=None, pos_embeddings=None, ntags=None): # build word embedding word_ids = Input(batch_shape=(None, None), dtype='int32') if w_embeddings is None: word_embeddings = Embedding(input_dim=config.vocab_size, output_dim=config.word_embedding_size, mask_zero=True)(word_ids) else: word_embeddings = Embedding(input_dim=w_embeddings.shape[0], output_dim=w_embeddings.shape[1], mask_zero=True, weights=[w_embeddings])(word_ids) # combine characters and word x = Bidirectional( LSTM(units=config.num_word_lstm_units, return_sequences=True))(word_embeddings) x = Dropout(config.dropout)(x) x = Dense(ntags)(x) self.crf = ChainCRF() pred = self.crf(x) sequence_lengths = Input(batch_shape=(None, 1), dtype='int32') self.model = Model(inputs=[word_ids, sequence_lengths], outputs=[pred]) self.config = config
def __init__(self, config, w_embeddings=None, c_embeddings=None, pos_embeddings=None, ntags=None): # build word embedding word_ids = Input(batch_shape=(None, None), dtype='int32') if w_embeddings is None: word_embeddings = Embedding(input_dim=config.vocab_size, output_dim=config.word_embedding_size, mask_zero=True)(word_ids) else: word_embeddings = Embedding(input_dim=w_embeddings.shape[0], output_dim=w_embeddings.shape[1], mask_zero=True, weights=[w_embeddings])(word_ids) # build character based word embedding char_ids = Input(batch_shape=(None, None, None), dtype='int32') char_embeddings = Embedding(input_dim=config.char_vocab_size, output_dim=config.char_embedding_size, mask_zero=True)(char_ids) s = K.shape(char_embeddings) char_embeddings = Lambda(lambda x: K.reshape( x, shape=(-1, s[-2], config.char_embedding_size)))(char_embeddings) fwd_state = LSTM(config.num_char_lstm_units, return_state=True)(char_embeddings)[-2] bwd_state = LSTM(config.num_char_lstm_units, return_state=True, go_backwards=True)(char_embeddings)[-2] char_embeddings = Concatenate(axis=-1)([fwd_state, bwd_state]) # shape = (batch size, max sentence length, char hidden size) char_embeddings = Lambda(lambda x: K.reshape( x, shape=[-1, s[1], 2 * config.num_char_lstm_units]))( char_embeddings) hand_feature = Input(batch_shape=(None, None, config.hand_feature_size), dtype='float32') # combine characters and word x = Concatenate(axis=-1)([word_embeddings, char_embeddings]) x = Dropout(config.dropout)(x) x = Bidirectional( LSTM(units=config.num_word_lstm_units, return_sequences=True))(x) x = Dropout(config.dropout)(x) x = Concatenate(axis=-1)([x, hand_feature]) x = Dense(ntags)(x) self.crf = ChainCRF() pred = self.crf(x) sequence_lengths = Input(batch_shape=(None, 1), dtype='int32') self.model = Model( inputs=[word_ids, char_ids, hand_feature, sequence_lengths], outputs=[pred]) self.config = config
def test_chain_crf(self): vocab_size = 20 n_classes = 11 model = Sequential() model.add(Embedding(vocab_size, n_classes)) layer = ChainCRF() model.add(layer) model.compile(loss=layer.loss, optimizer='sgd') # Train first mini batch batch_size, maxlen = 2, 2 x = np.random.randint(1, vocab_size, size=(batch_size, maxlen)) y = np.random.randint(n_classes, size=(batch_size, maxlen)) y = np.eye(n_classes)[y] model.train_on_batch(x, y) print(x) print(y)