def __init__(self, input, n_in, n_out, prefix='hidden', W=None, b=None, activation=T.tanh): self.input = input if W is None: W_values = numpy.asarray( np.random.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name=_p(prefix, 'W'), borrow=True) if b is None: b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) b = theano.shared(value=b_values, name=_p(prefix, 'b'), borrow=True) self.W = W self.b = b self.pre_activation = T.dot(input, self.W) + self.b if activation is None: self.output = self.pre_activation elif activation == T.nnet.softmax: shape= self.pre_activation.shape tmp = self.pre_activation.reshape((T.prod(shape[:-1]), shape[-1])) self.output = T.nnet.softmax(tmp).reshape(shape) else: self.output = activation(self.pre_activation) self.params = {_p(prefix, 'W'):W, _p(prefix, 'b'):b}
def __init__(self, input, n_in, n_out, prefix='hidden', W=None, b=None, activation=T.tanh): self.input = input if W is None: W_values = numpy.asarray(np.random.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out)), dtype=theano.config.floatX) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name=_p(prefix, 'W'), borrow=True) if b is None: b_values = numpy.zeros((n_out, ), dtype=theano.config.floatX) b = theano.shared(value=b_values, name=_p(prefix, 'b'), borrow=True) self.W = W self.b = b self.pre_activation = T.dot(input, self.W) + self.b if activation is None: self.output = self.pre_activation elif activation == T.nnet.softmax: shape = self.pre_activation.shape tmp = self.pre_activation.reshape((T.prod(shape[:-1]), shape[-1])) self.output = T.nnet.softmax(tmp).reshape(shape) else: self.output = activation(self.pre_activation) self.params = {_p(prefix, 'W'): W, _p(prefix, 'b'): b}
def __init__(self, x, x_mask, n_samples_const, n_features, n_hidden_units, prefix = 'lstm', forget=True): # shape check n_samples, n_steps = x_mask.shape self.x = x.reshape((n_samples, n_features, n_steps)).dimshuffle(2, 0, 1) self.x_mask = x_mask.T # init the parameters self.params = dict() num_activations = 3 + forget W = self.stacked_ortho_wts(n_features, n_hidden_units, num_activations) self.params[_p(prefix, 'W')] = shared(W) U = self.stacked_ortho_wts(n_hidden_units, n_hidden_units, num_activations) self.params[_p(prefix, 'U')] = shared(U) b = np.zeros(num_activations * n_hidden_units) self.params[_p(prefix, 'b')] = shared(b) out0 = shared(np.zeros((n_samples_const, n_hidden_units)).astype(theano.config.floatX)) cell0 = shared(np.zeros((n_samples_const, n_hidden_units)).astype(theano.config.floatX)) def _step(m_, x_, out_tmp, cell_tmp): preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_ inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units]) out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2*n_hidden_units]) fgt_gate = T.nnet.sigmoid( preact[:, 2*n_hidden_units:3*n_hidden_units]) if forget else 1 - inn_gate # pre activation, tanh cell_val = T.tanh(preact[:, -n_hidden_units:]) cell_val = fgt_gate * cell_tmp + inn_gate * cell_val cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp # after activation, linear out = out_gate * cell_val out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp return out, cell_val self.param_w = [self.params[_p(prefix, 'W')]] self.param_b = [self.params[_p(prefix, 'b')]] self.x_prime = T.dot(self.x, self.params[_p(prefix, 'W')]) + self.params[_p(prefix, 'b')][None, None, :] rval, updates = theano.scan(_step, sequences=[self.x_mask, self.x_prime], outputs_info = [out0, cell0], name=_p(prefix, '_layers')) self.output = rval[0] self.n_out = n_hidden_units
def _step(m_, x_, out_tmp, cell_tmp): preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_ inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units]) out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2*n_hidden_units]) fgt_gate = T.nnet.sigmoid( preact[:, 2*n_hidden_units:3*n_hidden_units]) if forget else 1 - inn_gate # pre activation, tanh cell_val = T.tanh(preact[:, -n_hidden_units:]) cell_val = fgt_gate * cell_tmp + inn_gate * cell_val cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp # after activation, linear out = out_gate * cell_val out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp return out, cell_val
def _step(m_, x_, out_tmp, cell_tmp): preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_ inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units]) out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2 * n_hidden_units]) fgt_gate = T.nnet.sigmoid( preact[:, 2 * n_hidden_units:3 * n_hidden_units]) if forget else 1 - inn_gate # pre activation, tanh cell_val = T.tanh(preact[:, -n_hidden_units:]) cell_val = fgt_gate * cell_tmp + inn_gate * cell_val cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp # after activation, linear out = out_gate * cell_val out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp return out, cell_val
def __init__(self, x, x_mask, n_samples_const, n_features, n_hidden_units, prefix='lstm', forget=True): # shape check n_samples, n_steps = x_mask.shape self.x = x.reshape( (n_samples, n_features, n_steps)).dimshuffle(2, 0, 1) self.x_mask = x_mask.T # init the parameters self.params = dict() num_activations = 3 + forget W = self.stacked_ortho_wts(n_features, n_hidden_units, num_activations) self.params[_p(prefix, 'W')] = shared(W) U = self.stacked_ortho_wts(n_hidden_units, n_hidden_units, num_activations) self.params[_p(prefix, 'U')] = shared(U) b = np.zeros(num_activations * n_hidden_units) self.params[_p(prefix, 'b')] = shared(b) out0 = shared( np.zeros((n_samples_const, n_hidden_units)).astype(theano.config.floatX)) cell0 = shared( np.zeros((n_samples_const, n_hidden_units)).astype(theano.config.floatX)) def _step(m_, x_, out_tmp, cell_tmp): preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_ inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units]) out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2 * n_hidden_units]) fgt_gate = T.nnet.sigmoid( preact[:, 2 * n_hidden_units:3 * n_hidden_units]) if forget else 1 - inn_gate # pre activation, tanh cell_val = T.tanh(preact[:, -n_hidden_units:]) cell_val = fgt_gate * cell_tmp + inn_gate * cell_val cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp # after activation, linear out = out_gate * cell_val out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp return out, cell_val self.param_w = [self.params[_p(prefix, 'W')]] self.param_b = [self.params[_p(prefix, 'b')]] self.x_prime = T.dot(self.x, self.params[_p( prefix, 'W')]) + self.params[_p(prefix, 'b')][None, None, :] rval, updates = theano.scan(_step, sequences=[self.x_mask, self.x_prime], outputs_info=[out0, cell0], name=_p(prefix, '_layers')) self.output = rval[0] self.n_out = n_hidden_units