Example #1
0
    def __init__(self,
                 n_in,
                 hidden_layer_size,
                 n_out,
                 L1_reg,
                 L2_reg,
                 hidden_layer_type,
                 output_type='LINEAR',
                 network_type='S2S',
                 ed_type='HED',
                 dropout_rate=0.0,
                 optimizer='sgd',
                 MLU_div_lengths=[],
                 loss_function='MMSE',
                 rnn_batch_training=False):
        """ This function initialises a neural network

        :param n_in: Dimensionality of input features
        :type in: Integer
        :param hidden_layer_size: The layer size for each hidden layer
        :type hidden_layer_size: A list of integers
        :param n_out: Dimensionality of output features
        :type n_out: Integrer
        :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM
        :param L1_reg: the L1 regulasation weight
        :param L2_reg: the L2 regulasation weight
        :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression.
        :param dropout_rate: probability of dropout, a float number between 0 and 1.
        """

        logger = logging.getLogger("DNN initialization")

        self.n_in = int(n_in)
        self.n_out = int(n_out)

        self.n_layers = len(hidden_layer_size)

        self.dropout_rate = dropout_rate
        self.optimizer = optimizer
        self.loss_function = loss_function
        self.is_train = T.iscalar('is_train')
        self.rnn_batch_training = rnn_batch_training

        assert len(hidden_layer_size) == len(hidden_layer_type)

        self.list_of_activations = [
            'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU'
        ]

        BLSTM_variants = ['BLSTM', 'BSLSTM', 'BLSTME', 'BSLSTME']
        Encoder_variants = ['RNNE', 'LSTME', 'BLSTME', 'SLSTME', 'TANHE']
        Decoder_variants = ['RNND', 'LSTMD', 'SLSTMD']

        if self.rnn_batch_training:
            self.x = T.tensor3('x')
            self.y = T.tensor3('y')
        else:
            self.x = T.matrix('x')
            self.y = T.matrix('y')

        if network_type == "S2S":
            self.d = T.ivector('d')
            self.f = T.matrix('f')

        self.L1_reg = L1_reg
        self.L2_reg = L2_reg

        self.rnn_layers = []
        self.params = []
        self.delta_params = []

        rng = np.random.RandomState(123)

        prev_seg_end = 0
        encoder_count = 0
        MLU_div = MLU_div_lengths
        for i in range(self.n_layers):
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_size[i - 1]
                if hidden_layer_type[i - 1] in BLSTM_variants:
                    input_size = hidden_layer_size[i - 1] * 2

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.rnn_layers[i - 1].output

            ### sequence-to-sequence mapping ###
            if hidden_layer_type[i - 1] in Encoder_variants:
                dur_input = self.d
                frame_feat_input = self.f

                # vanilla encoder-decoder (phone-level features)
                if ed_type == "VED":
                    seq2seq_model = DistributedSequenceEncoder(
                        rng, layer_input, dur_input)
                    layer_input = T.concatenate(
                        (seq2seq_model.encoded_output, frame_feat_input),
                        axis=1)
                    input_size = input_size + 4
                # hierarchical encoder-decoder
                elif ed_type == "HED":
                    seg_len = layer_input.size // input_size
                    seg_dur_input = dur_input[prev_seg_end:prev_seg_end +
                                              seg_len]
                    num_of_segs = T.sum(seg_dur_input)
                    seq2seq_model = DistributedSequenceEncoder(
                        rng, layer_input, seg_dur_input)
                    addfeat_input = frame_feat_input[
                        0:num_of_segs,
                        MLU_div[encoder_count]:MLU_div[encoder_count + 1]]
                    layer_input = T.concatenate(
                        (seq2seq_model.encoded_output, addfeat_input), axis=1)
                    input_size = input_size + (MLU_div[encoder_count + 1] -
                                               MLU_div[encoder_count])
                    prev_seg_end = prev_seg_end + seg_len
                    encoder_count = encoder_count + 1

            # hidden layer activation
            if hidden_layer_type[i] in self.list_of_activations:
                hidden_activation = hidden_layer_type[i].lower()
                hidden_layer = GeneralLayer(rng,
                                            layer_input,
                                            input_size,
                                            hidden_layer_size[i],
                                            activation=hidden_activation,
                                            p=self.dropout_rate,
                                            training=self.is_train)
            elif hidden_layer_type[i] == 'TANHE' or hidden_layer_type[
                    i] == 'SIGMOIDE':
                hidden_activation = hidden_layer_type[i][0:-1].lower()
                hidden_layer = GeneralLayer(rng,
                                            layer_input,
                                            input_size,
                                            hidden_layer_size[i],
                                            activation=hidden_activation,
                                            p=self.dropout_rate,
                                            training=self.is_train)
            elif hidden_layer_type[i] == 'TANH_LHUC':
                hidden_layer = SigmoidLayer_LHUC(rng,
                                                 layer_input,
                                                 input_size,
                                                 hidden_layer_size[i],
                                                 activation=T.tanh,
                                                 p=self.dropout_rate,
                                                 training=self.is_train)
            elif hidden_layer_type[i] == 'SLSTM' or hidden_layer_type[
                    i] == 'SLSTME':
                hidden_layer = SimplifiedLstm(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'SLSTMD':
                hidden_layer = SimplifiedLstmDecoder(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    self.n_out,
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'SGRU':
                hidden_layer = SimplifiedGRU(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'GRU':
                hidden_layer = GatedRecurrentUnit(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'LSTM' or hidden_layer_type[
                    i] == 'LSTME':
                hidden_layer = VanillaLstm(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'LSTMD':
                hidden_layer = VanillaLstmDecoder(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    self.n_out,
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'BSLSTM' or hidden_layer_type[
                    i] == 'BSLSTME':
                hidden_layer = BidirectionSLstm(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'BLSTM' or hidden_layer_type[
                    i] == 'BLSTME':
                hidden_layer = BidirectionLstm(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'RNN' or hidden_layer_type[
                    i] == 'RNNE':
                hidden_layer = VanillaRNN(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'RNND':
                hidden_layer = VanillaRNNDecoder(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    self.n_out,
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            elif hidden_layer_type[i] == 'LSTM_LHUC':
                hidden_layer = VanillaLstm_LHUC(
                    rng,
                    layer_input,
                    input_size,
                    hidden_layer_size[i],
                    p=self.dropout_rate,
                    training=self.is_train,
                    rnn_batch_training=self.rnn_batch_training)
            else:
                logger.critical(
                    "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n"
                    % (hidden_layer_type[i]))
                sys.exit(1)

            self.rnn_layers.append(hidden_layer)
            self.params.extend(hidden_layer.params)

        input_size = hidden_layer_size[-1]
        if hidden_layer_type[-1] in BLSTM_variants:
            input_size = hidden_layer_size[-1] * 2

        if hidden_layer_type[-1] in Decoder_variants:
            self.final_layer = self.rnn_layers[-1]
        else:
            output_activation = output_type.lower()
            if output_activation == 'linear':
                self.final_layer = LinearLayer(rng, self.rnn_layers[-1].output,
                                               input_size, self.n_out)
            elif output_activation == 'recurrent':
                self.final_layer = RecurrentOutputLayer(
                    rng,
                    self.rnn_layers[-1].output,
                    input_size,
                    self.n_out,
                    rnn_batch_training=self.rnn_batch_training)
            elif output_type.upper() in self.list_of_activations:
                self.final_layer = GeneralLayer(rng,
                                                self.rnn_layers[-1].output,
                                                input_size,
                                                self.n_out,
                                                activation=output_activation)
            else:
                logger.critical(
                    "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n"
                    % (output_type))
                sys.exit(1)

            self.params.extend(self.final_layer.params)

        self.updates = {}
        for param in self.params:
            self.updates[param] = theano.shared(
                value=np.zeros(param.get_value(borrow=True).shape,
                               dtype=theano.config.floatX),
                name='updates')

        if self.loss_function == 'CCE':
            self.finetune_cost = self.categorical_crossentropy_loss(
                self.final_layer.output, self.y)
            self.errors = self.categorical_crossentropy_loss(
                self.final_layer.output, self.y)
        elif self.loss_function == 'Hinge':
            self.finetune_cost = self.multiclass_hinge_loss(
                self.final_layer.output, self.y)
            self.errors = self.multiclass_hinge_loss(self.final_layer.output,
                                                     self.y)
        elif self.loss_function == 'MMSE':
            if self.rnn_batch_training:
                self.y_mod = T.reshape(self.y, (-1, n_out))
                self.final_layer_output = T.reshape(self.final_layer.output,
                                                    (-1, n_out))

                nonzero_rows = T.any(self.y_mod, 1).nonzero()

                self.y_mod = self.y_mod[nonzero_rows]
                self.final_layer_output = self.final_layer_output[nonzero_rows]

                self.finetune_cost = T.mean(
                    T.sum((self.final_layer_output - self.y_mod)**2, axis=1))
                self.errors = T.mean(
                    T.sum((self.final_layer_output - self.y_mod)**2, axis=1))
            else:
                self.finetune_cost = T.mean(
                    T.sum((self.final_layer.output - self.y)**2, axis=1))
                self.errors = T.mean(
                    T.sum((self.final_layer.output - self.y)**2, axis=1))
Example #2
0
    def __init__(self,
                 n_in,
                 hidden_layer_size,
                 n_out,
                 L1_reg,
                 L2_reg,
                 hidden_layer_type,
                 output_type='LINEAR',
                 network_type='DNN',
                 dropout_rate=0.0,
                 loss_function='CCE'):
        """ This function initialises a neural network
        
        :param n_in: Dimensionality of input features
        :type in: Integer
        :param hidden_layer_size: The layer size for each hidden layer
        :type hidden_layer_size: A list of integers
        :param n_out: Dimensionality of output features
        :type n_out: Integrer
        :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM
        :param L1_reg: the L1 regulasation weight
        :param L2_reg: the L2 regulasation weight
        :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression.
        :param dropout_rate: probability of dropout, a float number between 0 and 1.
        """

        logger = logging.getLogger("DNN initialization")

        self.n_in = int(n_in)
        self.n_out = int(n_out)

        self.n_layers = len(hidden_layer_size)

        self.dropout_rate = dropout_rate
        self.loss_function = loss_function
        self.is_train = T.iscalar('is_train')

        assert len(hidden_layer_size) == len(hidden_layer_type)

        self.x = T.matrix('x')
        self.y = T.matrix('y')

        if network_type == "S2S":
            self.d = T.ivector('d')

        self.L1_reg = L1_reg
        self.L2_reg = L2_reg

        self.rnn_layers = []
        self.params = []
        self.delta_params = []

        rng = np.random.RandomState(123)

        Encoder_variants = ['RNNE', 'LSTME', 'BLSTME', 'SLSTME', 'TANHE']
        Decoder_variants = ['RNND', 'LSTMD', 'SLSTMD']
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_size[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.rnn_layers[i - 1].output
                if hidden_layer_type[i - 1] == 'BSLSTM' or hidden_layer_type[
                        i - 1] == 'BLSTM':
                    input_size = hidden_layer_size[i - 1] * 2

            if hidden_layer_type[i - 1] in Encoder_variants:
                dur_input = T.extra_ops.cumsum(self.d) - 1
                layer_input = layer_input[dur_input]
                seq2seq_model = DistributedSequenceEncoder(
                    rng, layer_input, self.d)
                layer_input = seq2seq_model.encoded_output
                #seg_len       = layer_input.size//input_size
                #seq2seq_model = VanillaSequenceEncoder(rng, layer_input, seg_len)

            if hidden_layer_type[i] == 'SLSTM' or hidden_layer_type[
                    i] == 'SLSTME':
                hidden_layer = SimplifiedLstm(rng,
                                              layer_input,
                                              input_size,
                                              hidden_layer_size[i],
                                              p=self.dropout_rate,
                                              training=self.is_train)
            elif hidden_layer_type[i] == 'SGRU':
                hidden_layer = SimplifiedGRU(rng,
                                             layer_input,
                                             input_size,
                                             hidden_layer_size[i],
                                             p=self.dropout_rate,
                                             training=self.is_train)
            elif hidden_layer_type[i] == 'GRU':
                hidden_layer = GatedRecurrentUnit(rng,
                                                  layer_input,
                                                  input_size,
                                                  hidden_layer_size[i],
                                                  p=self.dropout_rate,
                                                  training=self.is_train)
            elif hidden_layer_type[i] == 'LSTM_NFG':
                hidden_layer = LstmNFG(rng,
                                       layer_input,
                                       input_size,
                                       hidden_layer_size[i],
                                       p=self.dropout_rate,
                                       training=self.is_train)
            elif hidden_layer_type[i] == 'LSTM_NOG':
                hidden_layer = LstmNOG(rng,
                                       layer_input,
                                       input_size,
                                       hidden_layer_size[i],
                                       p=self.dropout_rate,
                                       training=self.is_train)
            elif hidden_layer_type[i] == 'LSTM_NIG':
                hidden_layer = LstmNIG(rng,
                                       layer_input,
                                       input_size,
                                       hidden_layer_size[i],
                                       p=self.dropout_rate,
                                       training=self.is_train)
            elif hidden_layer_type[i] == 'LSTM_NPH':
                hidden_layer = LstmNoPeepholes(rng,
                                               layer_input,
                                               input_size,
                                               hidden_layer_size[i],
                                               p=self.dropout_rate,
                                               training=self.is_train)
            elif hidden_layer_type[i] == 'LSTM' or hidden_layer_type[
                    i] == 'LSTME':
                hidden_layer = VanillaLstm(rng,
                                           layer_input,
                                           input_size,
                                           hidden_layer_size[i],
                                           p=self.dropout_rate,
                                           training=self.is_train)
            elif hidden_layer_type[i] == 'LSTMD':
                hidden_layer = VanillaLstmDecoder(rng,
                                                  layer_input,
                                                  input_size,
                                                  hidden_layer_size[i],
                                                  self.n_out,
                                                  p=self.dropout_rate,
                                                  training=self.is_train)
            elif hidden_layer_type[i] == 'BSLSTM':
                hidden_layer = BidirectionSLstm(rng,
                                                layer_input,
                                                input_size,
                                                hidden_layer_size[i],
                                                hidden_layer_size[i],
                                                p=self.dropout_rate,
                                                training=self.is_train)
            elif hidden_layer_type[i] == 'BLSTM':
                hidden_layer = BidirectionLstm(rng,
                                               layer_input,
                                               input_size,
                                               hidden_layer_size[i],
                                               hidden_layer_size[i],
                                               p=self.dropout_rate,
                                               training=self.is_train)
            elif hidden_layer_type[i] == 'RNN' or hidden_layer_type[i] == 'RNN':
                hidden_layer = VanillaRNN(rng,
                                          layer_input,
                                          input_size,
                                          hidden_layer_size[i],
                                          p=self.dropout_rate,
                                          training=self.is_train)
            elif hidden_layer_type[i] == 'RNND':
                hidden_layer = VanillaRNNDecoder(rng,
                                                 layer_input,
                                                 self.y,
                                                 input_size,
                                                 hidden_layer_size[i],
                                                 self.n_out,
                                                 p=self.dropout_rate,
                                                 training=self.is_train)
            elif hidden_layer_type[i] == 'TANH':
                hidden_layer = SigmoidLayer(rng,
                                            layer_input,
                                            input_size,
                                            hidden_layer_size[i],
                                            activation=T.tanh,
                                            p=self.dropout_rate,
                                            training=self.is_train)
            elif hidden_layer_type[i] == 'SIGMOID':
                hidden_layer = SigmoidLayer(rng,
                                            layer_input,
                                            input_size,
                                            hidden_layer_size[i],
                                            activation=T.nnet.sigmoid,
                                            p=self.dropout_rate,
                                            training=self.is_train)
            elif hidden_layer_type[i] == 'SOFTMAX':
                hidden_layer = SoftmaxLayer(rng, layer_input, input_size,
                                            hidden_layer_size[i] + 1)
            else:
                logger.critical(
                    "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n"
                    % (hidden_layer_type[i]))
                sys.exit(1)

            self.rnn_layers.append(hidden_layer)
            self.params.extend(hidden_layer.params)

        input_size = hidden_layer_size[-1]
        if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[
                -1] == 'BLSTM':
            input_size = hidden_layer_size[-1] * 2

        if hidden_layer_type[-1] in Decoder_variants:
            self.final_layer = self.rnn_layers[-1]
        else:
            if output_type == 'LINEAR':
                self.final_layer = LinearLayer(rng, self.rnn_layers[-1].output,
                                               input_size, self.n_out)
            elif output_type == 'SOFTMAX':
                self.final_layer = SoftmaxLayer(rng,
                                                self.rnn_layers[-1].output,
                                                input_size, self.n_out)
            elif output_type == 'SIGMOID':
                self.final_layer = SigmoidLayer(rng,
                                                self.rnn_layers[-1].output,
                                                input_size,
                                                self.n_out,
                                                activation=T.nnet.sigmoid)
#           elif output_type == 'BSLSTM':
#               self.final_layer = BidirectionLSTM(rng, self.rnn_layers[-1].output, input_size, hidden_layer_size[-1], self.n_out)
            else:
                logger.critical(
                    "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n"
                    % (output_type))
                sys.exit(1)

        self.params.extend(self.final_layer.params)

        self.updates = {}
        for param in self.params:
            self.updates[param] = theano.shared(
                value=np.zeros(param.get_value(borrow=True).shape,
                               dtype=theano.config.floatX),
                name='updates')

        if self.loss_function == 'CCE':
            self.finetune_cost = self.categorical_crossentropy_loss(
                self.final_layer.output, self.y)
            self.errors = self.categorical_crossentropy_loss(
                self.final_layer.output, self.y)
        elif self.loss_function == 'Hinge':
            self.finetune_cost = self.multiclass_hinge_loss(
                self.final_layer.output, self.y)
            self.errors = self.multiclass_hinge_loss(self.final_layer.output,
                                                     self.y)
        elif self.loss_function == 'MMSE':
            self.finetune_cost = T.mean(
                T.sum((self.final_layer.output - self.y)**2, axis=1))
            self.errors = T.mean(
                T.sum((self.final_layer.output - self.y)**2, axis=1))