def __init__(self, rng, input, mask, n_in, n_hiddens, parameters=None, output_type="last", prefix="lstms", truncate_gradient=-1, srng=None, dropout=0.0, use_dropout_regularization=False, stabilize_activations=None): self.output_type = output_type self.dropout = dropout self.truncate_gradient = truncate_gradient self.n_layers = len(n_hiddens) self.layers = [] self.input = input self.mask = mask self.n_in = n_in self.prefix = prefix self.stabilize_activations = stabilize_activations # reverse and copy because we want to pop off the parameters if parameters is not None: cur_parameters = list(parameters)[::-1] else: cur_parameters = None self.parameters = [] cur_in = n_in self.l2 = 0. self.norm_stabilizer = 0. for layer_id, n_hidden in enumerate(n_hiddens): cur_output_type = output_type if layer_id == self.n_layers-1 else "all" if cur_parameters is None: W = None U = None b = None else: W = cur_parameters.pop() U = cur_parameters.pop() b = cur_parameters.pop() if self.layers: if use_dropout_regularization: input = self.layers[-1].dropout_output else: input = self.layers[-1].output self.layers.append( BatchLSTM(rng, input, mask, cur_in, n_hidden, W=W, U=U, b=b, output_type=cur_output_type, prefix="%s_%d" % (self.prefix, layer_id), truncate_gradient=self.truncate_gradient, stabilize_activations=self.stabilize_activations)) if self.stabilize_activations is not None: self.norm_stabilizer += self.layers[-1].norm_stabilizer self.parameters.append(self.layers[-1].W) self.parameters.append(self.layers[-1].U) self.parameters.append(self.layers[-1].b) self.l2 += self.layers[-1].l2 cur_in = n_hidden self.output = self.layers[-1].output if srng is not None and dropout is not None and dropout > 0.0: self.dropout_output = theano_utils.apply_dropout( srng, self.output, p=dropout) else: self.dropout_output = self.output
def __init__(self, rng, input, n_in, n_hiddens, parameters=None, output_type="last", prefix="lstms", truncate_gradient=-1, srng=None, dropout=0.0): self.n_layers = len(n_hiddens) self.layers = [] self.input = input self.n_in = n_in self.prefix = prefix self.dropout = dropout # reverse and copy because we want to pop off the parameters if parameters is not None: cur_parameters = list(parameters)[::-1] else: cur_parameters = None self.parameters = [] cur_in = n_in self.l2 = 0. for layer_id, n_hidden in enumerate(n_hiddens): cur_output_type = output_type if layer_id == self.n_layers-1 else "all" if cur_parameters is None: W = None U = None b = None else: W = cur_parameters.pop() U = cur_parameters.pop() b = cur_parameters.pop() if self.layers: input = self.layers[-1].output self.layers.append( LSTM(rng, input, cur_in, n_hidden, W=W, U=U, b=b, output_type=cur_output_type, prefix="%s_%d" % (self.prefix, layer_id), truncate_gradient=truncate_gradient)) self.parameters.append(self.layers[-1].W) self.parameters.append(self.layers[-1].U) self.parameters.append(self.layers[-1].b) self.l2 += self.layers[-1].l2 cur_in = n_hidden self.output = self.layers[-1].output if srng is not None and dropout is not None and dropout > 0.0: self.dropout_output = theano_utils.apply_dropout( srng, self.output, p=dropout) else: self.dropout_output = self.output
def __init__(self, rng, input, mask, n_in, n_hidden, W=None, U=None, b=None, output_type="last", prefix="lstm", truncate_gradient=-1, srng=None, dropout=0.0, stabilize_activations=None): self.truncate_gradient = truncate_gradient self.output_type = output_type self.input = input self.dropout = dropout self.mask = mask self.n_hidden = n_hidden self.n_in = n_in self.prefix = prefix self.stabilize_activations = stabilize_activations if W is None or U is None or b is None: WU_values = numpy.concatenate( [ortho_weight(self.n_hidden + self.n_in)[:, :self.n_hidden], ortho_weight(self.n_hidden + self.n_in)[:, :self.n_hidden], ortho_weight(self.n_hidden + self.n_in)[:, :self.n_hidden], ortho_weight(self.n_hidden + self.n_in)[:, :self.n_hidden], ], axis=1) W_values = WU_values[:self.n_in] U_values = WU_values[self.n_in:] W = theano.shared(value=W_values, name="%s_W" % prefix, borrow=True) U = theano.shared(value=U_values, name="%s_U" % prefix, borrow=True) b_values = numpy.zeros(4 * self.n_hidden, dtype=THEANOTYPE) b = theano.shared(value=b_values, name="%s_b" % prefix, borrow=True) self.W = W self.U = U self.b = b self.parameters = [self.W, self.U, self.b] self.l2 = (self.W**2).sum() + (self.U**2).sum() self.input = input self.set_output() if srng is not None and dropout is not None and dropout > 0.0: self.dropout_output = theano_utils.apply_dropout( srng, self.output, p=dropout) else: self.dropout_output = self.output
def __init__(self, rng, input, mask, input_shape, filter_shape, n_hiddens, n_outputs=None, V=None, parameters=None, U=None, b=None, output_type="last", prefix="convlstms", truncate_gradient=-1, srng=None, dropout=0.0, out_W=None, out_b=None, use_dropout_regularization=False, stabilize_activations=None): """ initialization for hidden is just done at the zero level Parameters: ----------- V: Convolutional layer """ self.truncate_gradient = truncate_gradient self.output_type = output_type self.dropout = dropout self.use_dropout_regularization = use_dropout_regularization self.srng = srng self.input = input self.mask = mask self.input_shape = input_shape self.filter_shape = filter_shape self.n_in = filter_shape[0] self.n_hiddens = n_hiddens self.prefix = prefix self.stabilize_activations = stabilize_activations if n_outputs == None: self.n_outputs = self.n_hiddens[-1] else: self.n_outputs = n_outputs if V is None: n_units_in = numpy.prod(filter_shape[1:]) n_units_out = numpy.prod(filter_shape[0] * numpy.prod(filter_shape[2:])) V_values = numpy.asarray(rng.uniform( low=-numpy.sqrt(6. / (n_units_in + n_units_out)), high=numpy.sqrt(6. / (n_units_in + n_units_out)), size=filter_shape), dtype=theano.config.floatX) V = theano.shared(value=V_values.astype(THEANOTYPE), name="%s_V" % self.prefix, borrow=True) self.V = V self.conv_out = nnet.conv.conv2d( input=self.input.reshape(self.input_shape), filters=self.V, filter_shape=self.filter_shape, image_shape=self.input_shape )[:, :, :, 0].swapaxes(1, 2).swapaxes(0, 1) if self.use_dropout_regularization: self.lstm_input = theano_utils.apply_dropout( srng, self.conv_out, p=self.dropout) else: self.lstm_input = self.conv_out self.lstms = BatchMultiLayerLSTM( rng, self.lstm_input, self.mask[:self.conv_out.shape[0]], self.n_in, self.n_hiddens, parameters=parameters, output_type=self.output_type, prefix="%s_lstms" % self.prefix, truncate_gradient=self.truncate_gradient, srng=self.srng, dropout=self.dropout, use_dropout_regularization=self.use_dropout_regularization, stabilize_activations=self.stabilizer_activations) if self.stabilize_activations is not None: self.norm_stabilizer = self.lstms.norm_stabilizers else: self.norm_stabilizer = 0. self.parameters = [self.V] + self.lstms.parameters self.l2 = self.lstms.l2 + (self.V**2).sum() self.linear_layer = mlp.HiddenLayer( rng=rng, input=self.lstms.output, d_in=self.n_hiddens[-1], d_out=self.n_outputs, activation=None, W=out_W, b=out_b) self.out_W = self.linear_layer.W self.out_b = self.linear_layer.b self.l2 = self.l2 + (self.out_W**2).sum() self.parameters += [self.out_W, self.out_b] self.output = self.linear_layer.output self.dropout_output = self.output