valid_y = valid_y.reshape((valid_y.shape[0], 1)) test_y = test_y.reshape((test_y.shape[0], 1)) input_dim = train_x.shape[1] if args.reinit: init_batch_size = 16 init_batch = train_x[:size][-init_batch_size:] else: init_batch = None if args.model == 'BHN_MLPWN': model = MLPWeightNorm_BHN(lbda=lbda, perdatapoint=perdatapoint, srng=RandomStreams(seed=args.seed + 2000), prior=prior, coupling=coupling, n_hiddens=n_hiddens, n_units=n_units, input_dim=input_dim, flow=args.flow, init_batch=init_batch) elif args.model == 'MCdropout_MLP': model = MCdropout_MLP(n_hiddens=n_hiddens, n_units=n_units) else: raise Exception('no model named `{}`'.format(args.model)) va_rec_name = name + '_recs' tr_rec_name = name + '_recs_train' # TODO (we're already saving the valid_recs!) save_path = name + '.params.npy'
import cPickle import numpy try: import pylab except ImportError: print( "pylab isn't available. If you use its functionality, it will crash.") print "It can be installed with 'pip install -q Pillow'" import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams #Don't use a python long as this don't work on 32 bits computers. numpy.random.seed(0xbeef) theano_rng = RandomStreams(seed=numpy.random.randint(1 << 30)) theano.config.warn.subtensor_merge_bug = False from theano.compat.python2x import OrderedDict signal_width = 1000 def load_fruitspeech(fruit_list=['apple', 'pineapple']): # Check if dataset is in the data directory. data_path = os.path.join(os.path.split(__file__)[0], "data") if not os.path.exists(data_path): os.makedirs(data_path) dataset = 'audio.tar.gz' data_file = os.path.join(data_path, dataset)
def __init__(self, input, nvis, nhid=None, nvis_dec=None, nhid_dec=None, rnd=None, bhid=None, cost_type=CostType.MeanSquared, momentum=1, num_pieces=1, L2_reg=-1, L1_reg=-1, sparse_initialize=False, nonlinearity=NonLinearity.TANH, W=None, b=None, bvis=None, tied_weights=True, reverse=False): assert reverse is False self.input = input self.nvis = nvis self.nhid = nhid self.bhid = bhid self.bvis = bvis self.momentum = momentum self.nonlinearity = nonlinearity self.tied_weights = tied_weights self.gparams = None self.reverse = reverse self.activation = self.get_non_linearity_fn() self.catched_params = {} if cost_type == CostType.MeanSquared: self.cost_type = CostType.MeanSquared elif cost_type == CostType.CrossEntropy: self.cost_type = CostType.CrossEntropy if rnd is None: self.rnd = np.random.RandomState(1231) else: self.rnd = rnd self.srng = RandomStreams(seed=1231) self.hidden = AEHiddenLayer(input=input, n_in=nvis, n_out=nhid, num_pieces=num_pieces, n_in_dec=nvis_dec, W=W, b=b, n_out_dec=nhid_dec, activation=self.activation, tied_weights=tied_weights, sparse_initialize=sparse_initialize, rng=rnd) self.params = self.hidden.params self.sparse_initialize = sparse_initialize self.L1_reg = L1_reg self.L2_reg = L2_reg self.L1 = 0 self.L2 = 0 if input is not None: self.x = input else: self.x = T.matrix('x_input', dtype=theano.config.floatX)
def __init__(self, numvis, numnote, numfac, numvel, numvelfac, numacc, numaccfac, numjerk, seq_len_to_train, seq_len_to_predict, output_type='real', coststart=4, vis_corruption_type="zeromask", vis_corruption_level=0.0, numpy_rng=None, theano_rng=None): self.numvis = numvis self.numnote = numnote self.numfac = numfac self.numvel = numvel self.numvelfac = numvelfac self.numacc = numacc self.numaccfac = numaccfac self.numjerk = numjerk self.seq_len_to_train = seq_len_to_train self.seq_len_to_predict = seq_len_to_predict self.output_type = output_type self.vis_corruption_type = vis_corruption_type self.vis_corruption_level = theano.shared(value=numpy.array( [vis_corruption_level]), name='vis_corruption_level') self.coststart = coststart self.inputs = T.matrix(name='inputs') if not numpy_rng: self.numpy_rng = numpy.random.RandomState(1) else: self.numpy_rng = numpy_rng if not theano_rng: theano_rng = RandomStreams(1) self.wxf_left = theano.shared(value=self.numpy_rng.normal( size=(numvis + numnote, numfac)).astype(theano.config.floatX) * 0.01, name='wxf_left') # U self.wxf_right = theano.shared(value=self.numpy_rng.normal( size=(numvis + numnote, numfac)).astype(theano.config.floatX) * 0.01, name='wxf_right') # V self.wv = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numfac, numvel)).astype(theano.config.floatX), name='wv') # W self.wvf_left = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numvel, numvelfac)).astype(theano.config.floatX), name='wvf_left') self.wvf_right = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numvel, numvelfac)).astype(theano.config.floatX), name='wvf_right') self.wa = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numvelfac, numacc)).astype(theano.config.floatX), name='wa') self.waf_left = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numacc, numaccfac)).astype(theano.config.floatX), name='waf_left') self.waf_right = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numacc, numaccfac)).astype(theano.config.floatX), name='waf_right') self.wj = theano.shared(value=self.numpy_rng.uniform( low=-0.01, high=+0.01, size=(numaccfac, numjerk)).astype(theano.config.floatX), name='wj') self.bx = theano.shared( value=0.0 * numpy.ones(numvis + numnote, dtype=theano.config.floatX), name='bx') self.bv = theano.shared(value=0.0 * numpy.ones(numvel, dtype=theano.config.floatX), name='bv') self.ba = theano.shared(value=0.0 * numpy.ones(numacc, dtype=theano.config.floatX), name='ba') self.bj = theano.shared( value=0.0 * numpy.ones(numjerk, dtype=theano.config.floatX), name='bj') self.params = [ self.wxf_left, self.wxf_right, self.wv, self.wvf_left, self.wvf_right, self.wa, self.waf_left, self.waf_right, self.wj, self.bx, self.bv, self.ba, self.bj ] self._inputframes = [None] * self.seq_len_to_predict self._inputframes_and_notebook = [None] * self.seq_len_to_predict self._xfactors_left = [None] * self.seq_len_to_predict self._xfactors_right = [None] * self.seq_len_to_predict self._vels = [None] * self.seq_len_to_predict self._accs = [None] * self.seq_len_to_predict self._prejerks = [None] * self.seq_len_to_predict self._recons_with_notebook = [None] * self.seq_len_to_predict #extract all input frames and project onto input/output filters: for t in range(self.seq_len_to_predict): if t < self.seq_len_to_train: self._inputframes[t] = self.inputs[:, t * numvis:(t + 1) * numvis] else: self._inputframes[t] = T.zeros( (self._inputframes[0].shape[0], self.numvis)) if t > 3: if self.vis_corruption_type == 'zeromask': self._inputframes[t] = theano_rng.binomial( size=self._inputframes[t].shape, n=1, p=1.0 - self.vis_corruption_level, dtype=theano.config.floatX) * self._inputframes[t] elif self.vis_corruption_type == 'mixedmask': self._inputframes[t] = theano_rng.binomial( size=self._inputframes[t].shape, n=1, p=1.0 - self.vis_corruption_level / 2, dtype=theano.config.floatX) * self._inputframes[t] self._inputframes[t] = (1 - theano_rng.binomial( size=self._inputframes[t].shape, n=1, p=1.0 - self.vis_corruption_level / 2, dtype=theano.config.floatX)) * self._inputframes[t] elif self.vis_corruption_type == 'gaussian': self._inputframes[t] = theano_rng.normal( size=self._inputframes[t].shape, avg=0.0, std=self.vis_corruption_level, dtype=theano.config.floatX) + self._inputframes[t] else: assert False, "vis_corruption type not understood" self._inputframes_and_notebook[t] = T.concatenate( (self._inputframes[t], T.zeros((self._inputframes[t].shape[0], self.numnote))), 1) self._recons_with_notebook[t] = self._inputframes_and_notebook[t] for t in range(4, self.seq_len_to_predict): self._xfactors_left[t - 4] = T.dot( self._recons_with_notebook[t - 4], self.wxf_left) self._xfactors_right[t - 4] = T.dot( self._recons_with_notebook[t - 4], self.wxf_right) self._xfactors_left[t - 3] = T.dot( self._recons_with_notebook[t - 3], self.wxf_left) self._xfactors_right[t - 3] = T.dot( self._recons_with_notebook[t - 3], self.wxf_right) self._xfactors_left[t - 2] = T.dot( self._recons_with_notebook[t - 2], self.wxf_left) self._xfactors_right[t - 2] = T.dot( self._recons_with_notebook[t - 2], self.wxf_right) self._xfactors_left[t - 1] = T.dot( self._recons_with_notebook[t - 1], self.wxf_left) self._xfactors_right[t - 1] = T.dot( self._recons_with_notebook[t - 1], self.wxf_right) self._xfactors_left[t] = T.dot(self._recons_with_notebook[t], self.wxf_left) self._xfactors_right[t] = T.dot(self._recons_with_notebook[t], self.wxf_right) #re-infer current velocities v12 and v23: self._prevel01 = T.dot( self._xfactors_left[t - 4] * self._xfactors_right[t - 3], self.wv) + self.bv self._prevel12 = T.dot( self._xfactors_left[t - 3] * self._xfactors_right[t - 2], self.wv) + self.bv self._prevel23 = T.dot( self._xfactors_left[t - 2] * self._xfactors_right[t - 1], self.wv) + self.bv self._prevel34 = T.dot( self._xfactors_left[t - 1] * self._xfactors_right[t], self.wv) + self.bv #re-infer acceleration a123: self._preacc012 = T.dot( T.dot(T.nnet.sigmoid(self._prevel01), self.wvf_left) * T.dot(T.nnet.sigmoid(self._prevel12), self.wvf_right), self.wa) + self.ba self._preacc123 = T.dot( T.dot(T.nnet.sigmoid(self._prevel12), self.wvf_left) * T.dot(T.nnet.sigmoid(self._prevel23), self.wvf_right), self.wa) + self.ba self._preacc234 = T.dot( T.dot(T.nnet.sigmoid(self._prevel23), self.wvf_left) * T.dot(T.nnet.sigmoid(self._prevel34), self.wvf_right), self.wa) + self.ba if t == 4: self._prejerks[t - 1] = T.dot( T.dot(T.nnet.sigmoid(self._preacc012), self.waf_left) * T.dot(T.nnet.sigmoid(self._preacc123), self.waf_right), self.wj) + self.bj #infer jerk as weighted sum of past and re-infered: self._prejerks[t] = self._prejerks[t - 1] #fill in all remaining activations from top-level jerk and past: self._accs[t] = T.dot( T.dot(T.nnet.sigmoid(self._prejerks[t]), self.wj.T) * T.dot(self._preacc123, self.waf_left), self.waf_right.T) + self.ba self._vels[t] = T.dot( T.dot(self._accs[t], self.wa.T) * T.dot( self._prevel23, self.wvf_left), self.wvf_right.T) + self.bv self._recons_with_notebook[t] = T.dot( T.dot(self._recons_with_notebook[t - 1], self.wxf_left) * T.dot(self._vels[t], self.wv.T), self.wxf_right.T) + self.bx self._prediction = T.concatenate( [pred[:, :self.numvis] for pred in self._recons_with_notebook], 1) self._notebook = T.concatenate( [pred[:, self.numvis:] for pred in self._recons_with_notebook], 1) if self.output_type == 'binary': self._prediction_for_training = T.concatenate([ T.nnet.sigmoid(pred[:, :self.numvis]) for pred in self. _recons_with_notebook[self.coststart:self.seq_len_to_train] ], 1) else: self._prediction_for_training = T.concatenate([ pred[:, :self.numvis] for pred in self. _recons_with_notebook[self.coststart:self.seq_len_to_train] ], 1) print self.output_type if self.output_type == 'real': self._cost = T.mean( (self._prediction_for_training - self.inputs[:, self.coststart * self.numvis:self.seq_len_to_train * self.numvis])**2) elif self.output_type == 'binary': self._cost = -T.mean( self.inputs[:, self.coststart * self.numvis:self.seq_len_to_train * self.numvis] * T.log(self._prediction_for_training) + (1.0 - self.inputs[:, self.coststart * self.numvis:self. seq_len_to_train * self.numvis]) * T.log(1.0 - self._prediction_for_training)) self._grads = T.grad(self._cost, self.params) self.prediction = theano.function([self.inputs], self._prediction) self.notebook = theano.function([self.inputs], self._notebook) self.vels = [theano.function([self.inputs], v) for v in self._vels[4:]] self.accs = [theano.function([self.inputs], a) for a in self._accs[4:]] self.jerks = [ theano.function([self.inputs], j) for j in self._prejerks[4:] ] self.cost = theano.function([self.inputs], self._cost) self.grads = theano.function([self.inputs], self._grads) def get_cudandarray_value(x): if type(x) == theano.sandbox.cuda.CudaNdarray: return numpy.array(x.__array__()).flatten() else: return x.flatten() self.grad = lambda x: numpy.concatenate( [get_cudandarray_value(g) for g in self.grads(x)])
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, finetune_lr=0.1, input_x=None, label=None): self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) # wudi add the mean and standard deviation of the activation values to exam the neural net # Reference: Understanding the difficulty of training deep feedforward neural networks, Xavier Glorot, Yoshua Bengio self.out_mean = [] self.out_std = [] assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input_x is None: self.x = T.matrix('x') # the data is presented as rasterized images else: self.x = input_x if label is None: self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels else: self.y = label for i in xrange(self.n_layers): if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.out_mean.append(T.mean(sigmoid_layer.output)) self.out_std.append(T.std(sigmoid_layer.output)) self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer if i == 0: rbm_layer = GBRBM(input=layer_input, n_in=input_size, n_hidden=hidden_layers_sizes[i], \ W=None, hbias=None, vbias=None, numpy_rng=None, transpose=False, activation=T.nnet.sigmoid, theano_rng=None, name='grbm', W_r=None, dropout=0, dropconnect=0) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) ################################################# # Wudi change the annealing learning rate: ################################################# self.state_learning_rate = theano.shared(numpy.asarray(finetune_lr, dtype=theano.config.floatX), borrow=True)
# Carlos Morato, PhD. # [email protected] # Deep Learning for Advanced Robot Perception # # Naive LSTM to learn one-char to one-char mapping import numpy from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.utils import np_utils from theano.tensor.shared_randomstreams import RandomStreams # fix random seed for reproducibility numpy.random.seed(7) srng = RandomStreams(7) # define the raw dataset alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # create mapping of characters to integers (0-25) and the reverse char_to_int = dict((c, i) for i, c in enumerate(alphabet)) int_to_char = dict((i, c) for i, c in enumerate(alphabet)) # prepare the dataset of input to output pairs encoded as integers seq_length = 1 dataX = [] dataY = [] for i in range(0, len(alphabet) - seq_length, 1): seq_in = alphabet[i:i + seq_length] seq_out = alphabet[i + seq_length] dataX.append([char_to_int[char] for char in seq_in]) dataY.append(char_to_int[seq_out]) print(seq_in, '->', seq_out) # reshape X to be [samples, time steps, features] X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
def __init__( self, numpy_rng, theano_rng=None, cfg=None, # the network configuration dnn_shared=None, shared_layers=[], input=None): self.layers = [] self.params = [] self.delta_params = [] self.cfg = cfg self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation self.do_maxout = cfg.do_maxout self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.non_updated_layers = cfg.non_updated_layers if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data if input == None: self.x = T.matrix('x') else: self.x = input if cfg.multi_label is True: self.y = T.imatrix('y') else: self.y = T.ivector('y') for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: input_size = self.n_ins layer_input = self.x else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None if (i in shared_layers): W = dnn_shared.layers[i].W b = dnn_shared.layers[i].b if self.do_maxout == True: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i] * self.pool_size, W=W, b=b, activation=(lambda x: 1.0 * x), do_maxout=True, pool_size=self.pool_size) else: hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) # add the layer to our list of layers self.layers.append(hidden_layer) # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated if (i not in self.non_updated_layers): self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression(input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs, multi_label=cfg.multi_label) if self.n_outs > 0: self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) if self.l1_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l1_reg * (abs(W).sum()) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
minibatch_size = 100 input_dim = 784 # number of hidden units in encoder (x -> z) network encoder_hidden_dim = 500 # number of hidden units in decoder (z -> x) network decoder_hidden_dim = 500 # number of latent variables latent_dim = 2 # pairs of mu and sigma # l2 regularization weight lamda = 0.001 # learning rate learning_rate = 0.02 # random number generator used for sampling latent variables srng = RandomStreams(seed=123) # input to the network x = T.fmatrix(name='x') # build the model l_input = lasagne.layers.InputLayer(shape=(None, input_dim), input_var=x) l_encoder_hidden = lasagne.layers.DenseLayer( l_input, num_units=encoder_hidden_dim, W=lasagne.init.Normal(0.01), b=lasagne.init.Normal(0.01), nonlinearity=lasagne.nonlinearities.tanh) l_encoder_mu = lasagne.layers.DenseLayer(
def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): """ Initialise a gated recurrent unit :param rng: random state, fixed value for randome state for reproducible objective results :param x: input to a network :param n_in: number of input features :type n_in: integer :param n_h: number of hidden units :type n_h: integer :param p: the probability of dropout :param training: a binary value to indicate training or testing (for dropout training) """ self.n_in = int(n_in) self.n_h = int(n_h) self.rnn_batch_training = rnn_batch_training self.input = x if p > 0.0: if training==1: srng = RandomStreams(seed=123456) self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0) else: self.input = (1-p) * x self.W_xz = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX), name = 'W_xz') self.W_hz = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX), name = 'W_hz') self.W_xr = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX), name = 'W_xr') self.W_hr = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX), name = 'W_hr') self.W_xh = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX), name = 'W_xh') self.W_hh = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX), name = 'W_hh') self.b_z = theano.shared(value = np.zeros((n_h, ), dtype = config.floatX), name = 'b_z') self.b_r = theano.shared(value = np.zeros((n_h, ), dtype = config.floatX), name = 'b_r') self.b_h = theano.shared(value = np.zeros((n_h, ), dtype = config.floatX), name = 'b_h') if self.rnn_batch_training: self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0') self.h0 = T.repeat(self.h0, x.shape[1], 0) self.c0 = T.repeat(self.c0, x.shape[1], 0) else: self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0') ## pre-compute these for fast computation self.Wzx = T.dot(self.input, self.W_xz) self.Wrx = T.dot(self.input, self.W_xr) self.Whx = T.dot(self.input, self.W_xh) [self.h, self.c], _ = theano.scan(self.gru_as_activation_function, sequences = [self.Wzx, self.Wrx, self.Whx], outputs_info = [self.h0, self.c0]) # self.output = self.h self.params = [self.W_xz, self.W_hz, self.W_xr, self.W_hr, self.W_xh, self.W_hh, self.b_z, self.b_r, self.b_h] self.L2_cost = (self.W_xz ** 2).sum() + (self.W_hz ** 2).sum() + (self.W_xr ** 2).sum() + (self.W_hr ** 2).sum() + (self.W_xh ** 2).sum() + (self.W_hh ** 2).sum()
def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections :param rng: random state, fixed value for randome state for reproducible objective results :param x: input to a network :param n_in: number of input features :type n_in: integer :param n_h: number of hidden units :type n_h: integer :param p: the probability of dropout :param training: a binary value to indicate training or testing (for dropout training) """ self.input = x if p > 0.0: if training==1: srng = RandomStreams(seed=123456) self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0) else: self.input = (1-p) * x self.n_in = int(n_in) self.n_h = int(n_h) self.rnn_batch_training = rnn_batch_training # random initialisation Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX) Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX) Wy_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_out)), size=(n_out, n_h)), dtype=config.floatX) # Input gate weights self.W_xi = theano.shared(value=Wx_value, name='W_xi') self.W_hi = theano.shared(value=Wh_value, name='W_hi') self.w_ci = theano.shared(value=Wc_value, name='w_ci') self.W_yi = theano.shared(value=Wy_value, name='W_yi') # random initialisation Uh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_out)), dtype=config.floatX) # Output gate weights self.U_ho = theano.shared(value=Uh_value, name='U_ho') # random initialisation Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX) Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX) # Forget gate weights self.W_xf = theano.shared(value=Wx_value, name='W_xf') self.W_hf = theano.shared(value=Wh_value, name='W_hf') self.w_cf = theano.shared(value=Wc_value, name='w_cf') # random initialisation Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX) Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX) # Output gate weights self.W_xo = theano.shared(value=Wx_value, name='W_xo') self.W_ho = theano.shared(value=Wh_value, name='W_ho') self.w_co = theano.shared(value=Wc_value, name='w_co') # random initialisation Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX) Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX) # Cell weights self.W_xc = theano.shared(value=Wx_value, name='W_xc') self.W_hc = theano.shared(value=Wh_value, name='W_hc') # bias self.b_i = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_i') self.b_f = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_f') self.b_o = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_o') self.b_c = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_c') self.b = theano.shared(value=np.zeros((n_out, ), dtype=config.floatX), name='b') ### make a layer # initial value of hidden and cell state if self.rnn_batch_training: self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0') self.y0 = theano.shared(value=np.zeros((1, n_out), dtype = config.floatX), name = 'y0') self.h0 = T.repeat(self.h0, x.shape[1], 0) self.c0 = T.repeat(self.c0, x.shape[1], 0) self.y0 = T.repeat(self.c0, x.shape[1], 0) else: self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0') self.y0 = theano.shared(value=np.zeros((n_out, ), dtype = config.floatX), name = 'y0') self.Wix = T.dot(self.input, self.W_xi) self.Wfx = T.dot(self.input, self.W_xf) self.Wcx = T.dot(self.input, self.W_xc) self.Wox = T.dot(self.input, self.W_xo) [self.h, self.c, self.y], _ = theano.scan(self.recurrent_fn, sequences = [self.Wix, self.Wfx, self.Wcx, self.Wox], outputs_info = [self.h0, self.c0, self.y0]) self.output = self.y
def __init__(self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training=False): """ This is to initialise a standard RNN hidden unit :param rng: random state, fixed value for randome state for reproducible objective results :param x: input data to current layer :param n_in: dimension of input data :param n_h: number of hidden units/blocks :param n_out: dimension of output data :param p: the probability of dropout :param training: a binary value to indicate training or testing (for dropout training) """ self.input = x if p > 0.0: if training==1: srng = RandomStreams(seed=123456) self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0) else: self.input = (1-p) * x #(1-p) * self.n_in = int(n_in) self.n_h = int(n_h) self.n_out = int(n_out) self.rnn_batch_training = rnn_batch_training # random initialisation Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX) Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX) Wy_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_out)), size=(n_out, n_h)), dtype=config.floatX) Ux_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_out)), dtype=config.floatX) Uh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_out)), dtype=config.floatX) Uy_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_out)), size=(n_out, n_out)), dtype=config.floatX) # Input gate weights self.W_xi = theano.shared(value=Wx_value, name='W_xi') self.W_hi = theano.shared(value=Wh_value, name='W_hi') self.W_yi = theano.shared(value=Wy_value, name='W_yi') # Output gate weights self.U_xi = theano.shared(value=Ux_value, name='U_xi') self.U_hi = theano.shared(value=Uh_value, name='U_hi') self.U_yi = theano.shared(value=Uy_value, name='U_yi') # bias self.b_i = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_i') self.b = theano.shared(value=np.zeros((n_out, ), dtype=config.floatX), name='b') # initial value of hidden and cell state and output if self.rnn_batch_training: self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0') self.y0 = theano.shared(value=np.zeros((1, n_out), dtype = config.floatX), name = 'y0') self.h0 = T.repeat(self.h0, x.shape[1], 0) self.c0 = T.repeat(self.c0, x.shape[1], 0) self.y0 = T.repeat(self.c0, x.shape[1], 0) else: self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0') self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0') self.y0 = theano.shared(value=np.zeros((n_out, ), dtype = config.floatX), name = 'y0') self.Wix = T.dot(self.input, self.W_xi) [self.h, self.c, self.y], _ = theano.scan(self.recurrent_as_activation_function, sequences = [self.Wix], outputs_info = [self.h0, self.c0, self.y0]) self.output = self.y self.params = [self.W_xi, self.W_hi, self.W_yi, self.U_hi, self.b_i, self.b] self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum() + (self.W_yi ** 2).sum() + (self.U_hi ** 2).sum()
def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.logLayer = LogisticRegression(input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
import theano.tensor as T from theano import function from theano.tensor.shared_randomstreams import RandomStreams import numpy """ demo for how to define a function with a random variable. use case: where we want to define a function having a random variable, for example, introducing minor corruptions in inputs. """ random = RandomStreams(seed = 42) a = random.normal((1,3)) b = T.dmatrix('b') f = a * b g = function([b], f) print("Invocation1: ", g(numpy.ones((1,3)))) print("Invocation2: ", g(numpy.ones((1,3)))) print("Invocation3: ", g(numpy.ones((1,3))))