def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08,
                 compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs):

        super(LanguageModel, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size  #i.e. word embedding size
        self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size
        self.table_width = table_width

        self.name = 'Language_Model'

        if init_type == 'xavier':
            linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()


        self.compositional_layer = None
        self.linear = None
        if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            if init_type == 'xavier':
                linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
                lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
            else:  # default is gaussian
                linear_init = IsotropicGaussian()
                lstm_init = IsotropicGaussian()

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, #  2 * for the bidirectional
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'BaselineLSTMCompositionalLayer':
            self.compositional_layer =  BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        else:
            print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid')
            sys.exit()

        # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings
        self.language_model_RNN = LSTM(
            dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN',
            weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        self.children = [self.compositional_layer, self.linear, self.language_model_RNN]
class LanguageModel(Initializable):
    """
    This takes the word embeddings from LSTMCompositionalLayer and creates sentence embeddings using a LSTM

    compositional_layer_type can be:
        1) 'BidirectionalLSTMCompositionalLayer'
        2) 'UnidirectionalLSTMCompositionalLayer'
        3) 'BaselineLSTMCompositionalLayer'

    Input is a 3d tensor with the dimensions of (num_words, num_subwords, batch_size) and
    a 3d tensor a mask of size (num_words, num_subwords, batch_size)

    All hidden state sizes are the same as the subword embedding size

    This returns a 3d tensor with dimensions of
    (num_words = num RNN states, batch_size, sentence embedding size = LM_RNN_hidden_state_size = subword_RNN_hidden_state_size * 2)
    """

    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08,
                 compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs):

        super(LanguageModel, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size  #i.e. word embedding size
        self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size
        self.table_width = table_width

        self.name = 'Language_Model'

        if init_type == 'xavier':
            linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()


        self.compositional_layer = None
        self.linear = None
        if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            if init_type == 'xavier':
                linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
                lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
            else:  # default is gaussian
                linear_init = IsotropicGaussian()
                lstm_init = IsotropicGaussian()

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, #  2 * for the bidirectional
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'BaselineLSTMCompositionalLayer':
            self.compositional_layer =  BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        else:
            print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid')
            sys.exit()

        # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings
        self.language_model_RNN = LSTM(
            dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN',
            weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        self.children = [self.compositional_layer, self.linear, self.language_model_RNN]


    @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['sentence_embeddings', 'word_embeddings_mask'])
    def apply(self, subword_id_input_, subword_id_input_mask_):
        """
        subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
        It is expected as a dtype=uint16 or equivalent

        subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
        It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise.

        Returned is a 3d tensor of size (num_words = num RNN states, batch_size, sentence embedding size)
        Also returned is a 1d tensor of size (batch_size) describing if the sentence is valid of empty in the batch
        """
        word_embeddings, word_embeddings_mask = self.compositional_layer.apply(subword_id_input_, subword_id_input_mask_)
        sentence_embeddings = self.language_model_RNN.apply(
            self.linear.apply(word_embeddings), mask=word_embeddings_mask)[0] #[0] = hidden states, [1] = cells

        # sentence_embeddings_mask = word_embeddings_mask.max(axis=0).T

        return sentence_embeddings, word_embeddings_mask