def __init__(self, config, ntags=None): # build input, directly feed with word embedding by the data generator word_input = Input(shape=(None, config.word_embedding_size), name='word_input') # build character based embedding char_input = Input(shape=(None, config.max_char_length), dtype='int32', name='char_input') char_embeddings = TimeDistributed(Embedding(input_dim=config.char_vocab_size, output_dim=config.char_embedding_size, #mask_zero=True, #embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5), name='char_embeddings' ))(char_input) chars = TimeDistributed(Bidirectional(LSTM(config.num_char_lstm_units, return_sequences=False)))(char_embeddings) # length of sequence not used for the moment (but used for f1 communication) length_input = Input(batch_shape=(None, 1), dtype='int32', name='length_input') # combine characters and word embeddings x = Concatenate()([word_input, chars]) x = Dropout(config.dropout)(x) x = Bidirectional(LSTM(units=config.num_word_lstm_units, return_sequences=True, recurrent_dropout=config.recurrent_dropout))(x) x = Dropout(config.dropout)(x) x = Dense(config.num_word_lstm_units, activation='tanh')(x) x = Dense(ntags)(x) self.crf = ChainCRF() pred = self.crf(x) self.model = Model(inputs=[word_input, char_input, length_input], outputs=[pred]) self.config = config
def __init__(self, config, ntags=None): # build input, directly feed with word embedding by the data generator word_input = Input(shape=(None, config.word_embedding_size), name='word_input') # build character based embedding char_input = Input(shape=(None, config.max_char_length), dtype='int32', name='char_input') char_embeddings = TimeDistributed(Embedding(input_dim=config.char_vocab_size, output_dim=config.char_embedding_size, mask_zero=True, #embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5), name='char_embeddings' ))(char_input) chars = TimeDistributed(Bidirectional(LSTM(config.num_char_lstm_units, return_sequences=False)))(char_embeddings) # layout features input and embeddings features_input = Input(shape=(None, len(config.features_indices)), dtype='float32', name='features_input') # The input dimension is calculated by # features_vocabulary_size (default 12) * number_of_features + 1 (the zero is reserved for masking / padding) features_embedding = TimeDistributed(Embedding(input_dim=config.features_vocabulary_size * len(config.features_indices) + 1, output_dim=config.features_embedding_size, # mask_zero=True, trainable=False, name='features_embedding'), name="features_embedding_td")(features_input) features_embedding_bd = TimeDistributed(Bidirectional(LSTM(config.features_lstm_units, return_sequences=False)), name="features_embedding_td_2")(features_embedding) features_embedding_out = Dropout(config.dropout)(features_embedding_bd) # length of sequence not used for the moment (but used for f1 communication) length_input = Input(batch_shape=(None, 1), dtype='int32', name='length_input') # combine characters and word embeddings x = Concatenate()([word_input, chars, features_embedding_out]) x = Dropout(config.dropout)(x) x = Bidirectional(LSTM(units=config.num_word_lstm_units, return_sequences=True, recurrent_dropout=config.recurrent_dropout))(x) x = Dropout(config.dropout)(x) x = Dense(config.num_word_lstm_units, activation='tanh')(x) x = Dense(ntags)(x) self.crf = ChainCRF() pred = self.crf(x) self.model = Model(inputs=[word_input, char_input, features_input, length_input], outputs=[pred]) self.config = config
def __init__(self, config, ntags=None): # build input, directly feed with word embedding by the data generator word_input = Input(shape=(None, config.word_embedding_size), name='word_input') # build character based embedding char_input = Input(shape=(None, config.max_char_length), dtype='int32', name='char_input') char_embeddings = TimeDistributed( Embedding(input_dim=config.char_vocab_size, output_dim=config.char_embedding_size, mask_zero=True, name='char_embeddings' ))(char_input) dropout = Dropout(config.dropout)(char_embeddings) conv1d_out = TimeDistributed(Conv1D(kernel_size=3, filters=30, padding='same',activation='tanh', strides=1))(dropout) maxpool_out = TimeDistributed(GlobalMaxPooling1D())(conv1d_out) chars = Dropout(config.dropout)(maxpool_out) # custom features input and embeddings casing_input = Input(batch_shape=(None, None,), dtype='int32', name='casing_input') """ casing_embedding = Embedding(input_dim=config.case_vocab_size, output_dim=config.case_embedding_size, mask_zero=True, trainable=False, name='casing_embedding')(casing_input) casing_embedding = Dropout(config.dropout)(casing_embedding) """ # length of sequence not used for the moment (but used for f1 communication) length_input = Input(batch_shape=(None, 1), dtype='int32') # combine words, custom features and characters x = Concatenate(axis=-1)([word_input, chars]) x = Dropout(config.dropout)(x) x = Bidirectional(LSTM(units=config.num_word_lstm_units, return_sequences=True, recurrent_dropout=config.recurrent_dropout))(x) x = Dropout(config.dropout)(x) x = Dense(config.num_word_lstm_units, activation='tanh')(x) x = Dense(ntags)(x) self.crf = ChainCRF() pred = self.crf(x) self.model = Model(inputs=[word_input, char_input, casing_input, length_input], outputs=[pred]) self.config = config
def __init__(self, config: ModelConfig, ntags=None): super().__init__( config, ntags, require_casing=False, use_crf=True, supports_features=True, stateful=config.stateful ) stateful = self.stateful # stateful RNNs require the batch size to be passed in input_batch_size = config.batch_size if stateful else None model_inputs = [] lstm_inputs = [] # build input, directly feed with word embedding by the data generator word_input = Input( shape=(None, config.word_embedding_size), batch_shape=(input_batch_size, None, config.word_embedding_size), name='word_input' ) model_inputs.append(word_input) lstm_inputs.append(word_input) # build character based embedding char_input = Input( shape=(None, config.max_char_length), batch_shape=(input_batch_size, None, config.max_char_length), dtype='int32', name='char_input' ) model_inputs.append(char_input) if config.char_embedding_size: assert config.char_vocab_size, 'config.char_vocab_size required' char_embeddings = TimeDistributed(Embedding( input_dim=config.char_vocab_size, output_dim=config.char_embedding_size, mask_zero=config.char_input_mask_zero, name='char_embeddings_embedding' ), name='char_embeddings')(char_input) chars = TimeDistributed( Bidirectional(LSTM( config.num_char_lstm_units, dropout=config.char_input_dropout, recurrent_dropout=config.char_lstm_dropout, return_sequences=False )), name='char_lstm' )(char_embeddings) lstm_inputs.append(chars) # length of sequence not used for the moment (but used for f1 communication) length_input = Input(batch_shape=(None, 1), dtype='int32', name='length_input') # combine characters and word embeddings LOGGER.debug('model, config.use_features: %s', config.use_features) if config.use_features: LOGGER.info('model using features') assert config.max_feature_size > 0 features_input = Input( batch_shape=(input_batch_size, None, config.max_feature_size), name='features_input' ) model_inputs.append(features_input) features = features_input if config.features_embedding_size: features = TimeDistributed(Dense( config.features_embedding_size, name='features_embeddings_dense' ), name='features_embeddings')(features) LOGGER.info( 'word_input=%s, chars=%s, features=%s', word_input, chars, features ) lstm_inputs.append(features) x = _concatenate_inputs(lstm_inputs, name='word_lstm_input') x = Dropout(config.dropout, name='word_lstm_input_dropout')(x) x = Bidirectional(LSTM( units=config.num_word_lstm_units, return_sequences=True, recurrent_dropout=config.recurrent_dropout, stateful=stateful, ), name='word_lstm')(x) x = Dropout(config.dropout, name='word_lstm_output_dropout')(x) x = Dense( config.num_word_lstm_units, name='word_lstm_dense', activation='tanh' )(x) x = Dense(ntags, name='dense_ntags')(x) self.crf = ChainCRF(name='crf') pred = self.crf(x) model_inputs.append(length_input) self.model = Model(inputs=model_inputs, outputs=[pred]) self.config = config