예제 #1
0
    def __init__(self, rng, input, mask, n_in, n_hiddens, parameters=None,
                 output_type="last", prefix="lstms", truncate_gradient=-1,
                 srng=None, dropout=0.0, use_dropout_regularization=False, stabilize_activations=None):
        self.output_type = output_type
        self.dropout = dropout
        self.truncate_gradient = truncate_gradient
        self.n_layers = len(n_hiddens)
        self.layers = []
        self.input = input
        self.mask = mask
        self.n_in = n_in
        self.prefix = prefix
        self.stabilize_activations = stabilize_activations
        # reverse and copy because we want to pop off the parameters
        if parameters is not None:
            cur_parameters = list(parameters)[::-1]
        else:
            cur_parameters = None
            
        self.parameters = []
        cur_in = n_in
        self.l2 = 0.
        self.norm_stabilizer = 0.
        for layer_id, n_hidden in enumerate(n_hiddens):
            cur_output_type = output_type if layer_id == self.n_layers-1 else "all"
            if cur_parameters is None:
                W = None
                U = None
                b = None
            else:
                W = cur_parameters.pop()
                U = cur_parameters.pop()
                b = cur_parameters.pop()

            if self.layers:
                if use_dropout_regularization:
                    input = self.layers[-1].dropout_output
                else:
                    input = self.layers[-1].output
                
            self.layers.append(
                BatchLSTM(rng, input, mask, cur_in, n_hidden, W=W, U=U, b=b,
                          output_type=cur_output_type,
                          prefix="%s_%d" % (self.prefix, layer_id),
                          truncate_gradient=self.truncate_gradient,
                          stabilize_activations=self.stabilize_activations))
            if self.stabilize_activations is not None:
                self.norm_stabilizer += self.layers[-1].norm_stabilizer
            self.parameters.append(self.layers[-1].W)
            self.parameters.append(self.layers[-1].U)
            self.parameters.append(self.layers[-1].b)
            self.l2 += self.layers[-1].l2
            cur_in = n_hidden
        self.output = self.layers[-1].output
        if srng is not None and dropout is not None and dropout > 0.0:
            self.dropout_output = theano_utils.apply_dropout(
                srng, self.output, p=dropout)
        else:
            self.dropout_output = self.output
예제 #2
0
    def __init__(self, rng, input, n_in, n_hiddens, parameters=None,
                 output_type="last", prefix="lstms", truncate_gradient=-1,
                 srng=None, dropout=0.0):
        self.n_layers = len(n_hiddens)
        self.layers = []
        self.input = input
        self.n_in = n_in
        self.prefix = prefix
        self.dropout = dropout
        # reverse and copy because we want to pop off the parameters
        if parameters is not None:
            cur_parameters = list(parameters)[::-1]
        else:
            cur_parameters = None
            
        self.parameters = []
        cur_in = n_in
        self.l2 = 0.
        for layer_id, n_hidden in enumerate(n_hiddens):
            cur_output_type = output_type if layer_id == self.n_layers-1 else "all"
            if cur_parameters is None:
                W = None
                U = None
                b = None
            else:
                W = cur_parameters.pop()
                U = cur_parameters.pop()
                b = cur_parameters.pop()

            if self.layers:
                input = self.layers[-1].output
                
            self.layers.append(
                LSTM(rng, input, cur_in, n_hidden, W=W, U=U, b=b, output_type=cur_output_type,
                     prefix="%s_%d" % (self.prefix, layer_id),
                     truncate_gradient=truncate_gradient))
            self.parameters.append(self.layers[-1].W)
            self.parameters.append(self.layers[-1].U)
            self.parameters.append(self.layers[-1].b)
            self.l2 += self.layers[-1].l2
            cur_in = n_hidden

        
        self.output = self.layers[-1].output
        if srng is not None and dropout is not None and dropout > 0.0:
            self.dropout_output = theano_utils.apply_dropout(
                srng, self.output, p=dropout)
        else:
            self.dropout_output = self.output
예제 #3
0
 def __init__(self, rng, input, mask, n_in, n_hidden, W=None, U=None, b=None,
              output_type="last", prefix="lstm", truncate_gradient=-1,
              srng=None, dropout=0.0, stabilize_activations=None):
     self.truncate_gradient = truncate_gradient
     self.output_type = output_type
     self.input = input
     self.dropout = dropout
     self.mask = mask
     self.n_hidden = n_hidden
     self.n_in = n_in
     self.prefix = prefix
     self.stabilize_activations = stabilize_activations
     if W is None or U is None or b is None:
         WU_values = numpy.concatenate(
             [ortho_weight(self.n_hidden + self.n_in)[:,
                                                            :self.n_hidden],
              ortho_weight(self.n_hidden + self.n_in)[:,
                                                            :self.n_hidden],
              ortho_weight(self.n_hidden + self.n_in)[:,
                                                            :self.n_hidden],
              ortho_weight(self.n_hidden + self.n_in)[:,
                                                            :self.n_hidden],
              ], axis=1)
         W_values = WU_values[:self.n_in]
         U_values = WU_values[self.n_in:]
         W = theano.shared(value=W_values, name="%s_W" % prefix,
                           borrow=True)
         U = theano.shared(value=U_values, name="%s_U" % prefix,
                           borrow=True)
         b_values = numpy.zeros(4 * self.n_hidden, dtype=THEANOTYPE)
         b = theano.shared(value=b_values, name="%s_b" % prefix,
                           borrow=True)
     self.W = W
     self.U = U
     self.b = b
     self.parameters = [self.W, self.U, self.b]
     self.l2 = (self.W**2).sum() + (self.U**2).sum()
         
     self.input = input
     self.set_output()
     if srng is not None and dropout is not None and dropout > 0.0:
         self.dropout_output = theano_utils.apply_dropout(
             srng, self.output, p=dropout)
     else:
         self.dropout_output = self.output
예제 #4
0
    def __init__(self, rng, input, mask, input_shape, filter_shape,
                 n_hiddens, n_outputs=None, V=None, parameters=None, U=None, b=None,
                 output_type="last", prefix="convlstms", truncate_gradient=-1, srng=None, dropout=0.0, out_W=None, out_b=None, use_dropout_regularization=False, stabilize_activations=None):
        """
        initialization for hidden is just done at the zero level

        Parameters:
        -----------
        V:
           Convolutional layer
        """
        self.truncate_gradient = truncate_gradient
        self.output_type = output_type
        self.dropout = dropout
        self.use_dropout_regularization = use_dropout_regularization
        self.srng = srng
        self.input = input
        self.mask = mask
        self.input_shape = input_shape
        self.filter_shape = filter_shape
        self.n_in = filter_shape[0]
        self.n_hiddens = n_hiddens
        self.prefix = prefix
        self.stabilize_activations = stabilize_activations
        if n_outputs == None:
            self.n_outputs = self.n_hiddens[-1]
        else:
            self.n_outputs = n_outputs
        
        if V is None:
            n_units_in = numpy.prod(filter_shape[1:])
            n_units_out = numpy.prod(filter_shape[0] * numpy.prod(filter_shape[2:]))
            V_values = numpy.asarray(rng.uniform(
                low=-numpy.sqrt(6. / (n_units_in + n_units_out)),
                high=numpy.sqrt(6. / (n_units_in + n_units_out)),
                size=filter_shape), dtype=theano.config.floatX)
            V = theano.shared(value=V_values.astype(THEANOTYPE),
                              name="%s_V" % self.prefix,
                              borrow=True)

        self.V = V
        self.conv_out = nnet.conv.conv2d(
            input=self.input.reshape(self.input_shape),
            filters=self.V,
            filter_shape=self.filter_shape,
            image_shape=self.input_shape
            )[:, :, :, 0].swapaxes(1, 2).swapaxes(0, 1)
        if self.use_dropout_regularization:
            self.lstm_input = theano_utils.apply_dropout(
                srng, self.conv_out, p=self.dropout)
        else:
            self.lstm_input = self.conv_out
        self.lstms = BatchMultiLayerLSTM(
            rng, self.lstm_input, self.mask[:self.conv_out.shape[0]], self.n_in, self.n_hiddens,
            parameters=parameters, output_type=self.output_type,
            prefix="%s_lstms" % self.prefix, truncate_gradient=self.truncate_gradient,
            srng=self.srng, dropout=self.dropout, use_dropout_regularization=self.use_dropout_regularization, stabilize_activations=self.stabilizer_activations)
        if self.stabilize_activations is not None:
            self.norm_stabilizer = self.lstms.norm_stabilizers
        else:
            self.norm_stabilizer = 0.
        self.parameters = [self.V] + self.lstms.parameters
        self.l2 = self.lstms.l2 + (self.V**2).sum()
        self.linear_layer = mlp.HiddenLayer(
            rng=rng,
            input=self.lstms.output,
            d_in=self.n_hiddens[-1],
            d_out=self.n_outputs,
            activation=None,
            W=out_W,
            b=out_b)
        self.out_W = self.linear_layer.W
        self.out_b = self.linear_layer.b
        self.l2 = self.l2 + (self.out_W**2).sum()
        self.parameters += [self.out_W, self.out_b]
        self.output = self.linear_layer.output
        self.dropout_output = self.output