def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08, compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs): super(LanguageModel, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size #i.e. word embedding size self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size self.table_width = table_width self.name = 'Language_Model' if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.compositional_layer = None self.linear = None if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer': self.compositional_layer = BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, # 2 * for the bidirectional output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer': self.compositional_layer = LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'BaselineLSTMCompositionalLayer': self.compositional_layer = BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) else: print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid') sys.exit() # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings self.language_model_RNN = LSTM( dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.compositional_layer, self.linear, self.language_model_RNN]
class LanguageModel(Initializable): """ This takes the word embeddings from LSTMCompositionalLayer and creates sentence embeddings using a LSTM compositional_layer_type can be: 1) 'BidirectionalLSTMCompositionalLayer' 2) 'UnidirectionalLSTMCompositionalLayer' 3) 'BaselineLSTMCompositionalLayer' Input is a 3d tensor with the dimensions of (num_words, num_subwords, batch_size) and a 3d tensor a mask of size (num_words, num_subwords, batch_size) All hidden state sizes are the same as the subword embedding size This returns a 3d tensor with dimensions of (num_words = num RNN states, batch_size, sentence embedding size = LM_RNN_hidden_state_size = subword_RNN_hidden_state_size * 2) """ def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08, compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs): super(LanguageModel, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size #i.e. word embedding size self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size self.table_width = table_width self.name = 'Language_Model' if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.compositional_layer = None self.linear = None if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer': self.compositional_layer = BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, # 2 * for the bidirectional output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer': self.compositional_layer = LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) elif compositional_layer_type == 'BaselineLSTMCompositionalLayer': self.compositional_layer = BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words, self.subword_embedding_size, self.input_vocab_size, self.subword_RNN_hidden_state_size, self.table_width, init_type=init_type, name='compositional_layer') self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size, output_dim=self.LM_RNN_hidden_state_size * 4, name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) else: print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid') sys.exit() # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings self.language_model_RNN = LSTM( dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) self.children = [self.compositional_layer, self.linear, self.language_model_RNN] @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['sentence_embeddings', 'word_embeddings_mask']) def apply(self, subword_id_input_, subword_id_input_mask_): """ subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size). It is expected as a dtype=uint16 or equivalent subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size). It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise. Returned is a 3d tensor of size (num_words = num RNN states, batch_size, sentence embedding size) Also returned is a 1d tensor of size (batch_size) describing if the sentence is valid of empty in the batch """ word_embeddings, word_embeddings_mask = self.compositional_layer.apply(subword_id_input_, subword_id_input_mask_) sentence_embeddings = self.language_model_RNN.apply( self.linear.apply(word_embeddings), mask=word_embeddings_mask)[0] #[0] = hidden states, [1] = cells # sentence_embeddings_mask = word_embeddings_mask.max(axis=0).T return sentence_embeddings, word_embeddings_mask