def __init__(self, base): if not base.tied_weights: raise ValueError("%s is not a tied-weights autoencoder" % str(base)) self.weights = tensor.shared(base.weights.get_value(borrow=False), name="weights") self.visbias = tensor.shared(base.visbias.get_value(borrow=False), name="vb") self.hidbias = tensor.shared(base.visbias.get_value(borrow=False), name="hb") self.w_prime = tensor.shared(base.weights.get_value(borrow=False).T, name="w_prime") self._params = [self.visbias, self.hidbias, self.weights, self.w_prime]
def multinomial_sampler(rstream, n=1, p=[0.5, 0.5], draw_shape=None, ndim=None, dtype=theano.config.floatX): if not isinstance(n, theano.Variable): n = tensor.shared(numpy.asarray(n, dtype=int)) if not isinstance(p, theano.Variable): p = tensor.shared(numpy.asarray(p, dtype=theano.config.floatX)) rstate = rstream.new_shared_rstate() new_rstate, out = tensor.raw_random.multinomial(rstate, draw_shape, n, p, dtype=dtype) rstream.add_default_update(out, rstate, new_rstate) return out
def __init__(self, base): if not base.tied_weights: raise ValueError("%s is not a tied-weights autoencoder" % str(base)) self.weights = tensor.shared(base.weights.get_value(borrow=False), name='weights') self.visbias = tensor.shared(base.visbias.get_value(borrow=False), name='vb') self.hidbias = tensor.shared(base.visbias.get_value(borrow=False), name='hb') self.w_prime = tensor.shared(base.weights.get_value(borrow=False).T, name='w_prime') self._params = [self.visbias, self.hidbias, self.weights, self.w_prime]
def test_consistency_cpu_serial(): '''Verify that the random numbers generated by mrg_uniform, serially, are the same as the reference (Java) implementation by L'Ecuyer et al. ''' seed = 12345 n_samples = 5 n_streams = 12 n_substreams = 7 samples = [] curr_rstate = numpy.array([seed] * 6, dtype='int32') for i in range(n_streams): stream_rstate = curr_rstate.copy() for j in range(n_substreams): rstate = tensor.shared(numpy.array([stream_rstate.copy()], dtype='int32')) new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype=config.floatX, size=(1,)) # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior sample.rstate = rstate sample.update = (rstate, new_rstate) rstate.default_update = new_rstate f = theano.function([], sample) for k in range(n_samples): s = f() samples.append(s) # next substream stream_rstate = rng_mrg.ff_2p72(stream_rstate) # next stream curr_rstate = rng_mrg.ff_2p134(curr_rstate) samples = numpy.array(samples).flatten() assert(numpy.allclose(samples, java_samples))
def test_lookup(): table_size =1000 feature_num = 500 lookup_table_layer = LookupTableLayer(table_size, feature_num, 'test_lookup') input = T.shared(np.asarray([[0,1,2,3],[1,2,3,4], [7,8,9,10],[5,6,7,8]], dtype=np.int32)) output_flattern = lookup_table_layer.output(input) output_tensor = lookup_table_layer.output(input,tensor_output=True) flattern_shape = output_flattern.eval().shape tensor_shape = output_tensor.eval().shape assert flattern_shape == (4, 2000), "flattern shape = {0}".format(flattern_shape) assert tensor_shape == (4, 4, 500), "tensor shape = {0}".format(tensor_shape) #lookup_table_layer.save('/home/kingsfield/Data/models') #lookup_table_layer.load('/home/kingsfield/Data/models') f = file('/home/kingsfield/Data/models/test.save', 'wb') cPickle.dump(lookup_table_layer, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() f = file('/home/kingsfield/Data/models/test.save', 'rb') test = cPickle.load(f) f.close() print test.name,test._table_size,test._feature_num
def __init__(self, base): """ .. todo:: WRITEME """ if not base.tied_weights: raise ValueError("%s is not a tied-weights autoencoder" % str(base)) self.weights = tensor.shared(base.weights.get_value(borrow=False), name='weights') self.visbias = tensor.shared(base.visbias.get_value(borrow=False), name='vb') self.hidbias = tensor.shared(base.visbias.get_value(borrow=False), name='hb') self.w_prime = tensor.shared(base.weights.get_value(borrow=False).T, name='w_prime') self._params = [self.visbias, self.hidbias, self.weights, self.w_prime]
def __init__(self, base): if not (isinstance(base, Autoencoder) and base.tied_weights): raise ValueError("%s is not a tied-weights autoencoder" % str(base)) super(UntiedAutoencoder, self).__init__( nvis=base.nvis, nhid=base.nhid, act_enc=base.act_enc, act_dec=base.act_dec, tied_weights=True, irange=base.irange, rng=base.rng) self.weights = tensor.shared(base.weights.get_value(borrow=False), name='weights') self.visbias = tensor.shared(base.visbias.get_value(borrow=False), name='vb') self.hidbias = tensor.shared(base.visbias.get_value(borrow=False), name='hb') self.w_prime = tensor.shared(base.weights.get_value(borrow=False).T, name='w_prime') self._params = [self.visbias, self.hidbias, self.weights, self.w_prime]
def __init_weights(self): self.Wio = shared(self.init_cls.weight_init( self.rng, size_x=self.n_in, size_y=self.n_out, sparsity=-1, ), name='Wio') self.params += [self.Wio] self.bo = shared(self.init_cls.bias_init( self.rng, size_x=self.n_out, sparsity=-1, ), name='bo') self.params += [self.bo]
def get_shared_by_name(in_list, name): ''' the method takes in a parameter inlist, returns a single param which has the pattern the name ''' for ts in in_list: if ts.name.find(name) >= 0: return ts print("cant find", name) return T.shared(np.float32('0'), name='Auto')
def test_neibs_grad(): shape = (2,3,4,4) images = T.shared(numpy.arange(numpy.prod(shape), dtype='float32').reshape(shape)) cost = T.sum(T.sqr(images2neibs(images, (2,2))), axis=[0,1]) grad = T.grad(cost, images) f = theano.function([], [cost, grad], mode=mode_without_gpu) got = f() should_get = [numpy.asarray(290320.0, dtype=numpy.float32), numpy.asarray([[[[ 0., 2., 4., 6.], [ 8., 10., 12., 14.], [ 16., 18., 20., 22.], [ 24., 26., 28., 30.]], [[ 32., 34., 36., 38.], [ 40., 42., 44., 46.], [ 48., 50., 52., 54.], [ 56., 58., 60., 62.]], [[ 64., 66., 68., 70.], [ 72., 74., 76., 78.], [ 80., 82., 84., 86.], [ 88., 90., 92., 94.]]], [[[ 96., 98., 100., 102.], [ 104., 106., 108., 110.], [ 112., 114., 116., 118.], [ 120., 122., 124., 126.]], [[ 128., 130., 132., 134.], [ 136., 138., 140., 142.], [ 144., 146., 148., 150.], [ 152., 154., 156., 158.]], [[ 160., 162., 164., 166.], [ 168., 170., 172., 174.], [ 176., 178., 180., 182.], [ 184., 186., 188., 190.]]]], dtype=numpy.float32)] assert numpy.allclose(got[0], should_get[0]) assert numpy.allclose(got[1], should_get[1])
def __init__(self, input, n_in, n_out, prob_drop=0.5): self.prob_drop = prob_drop self.prob_keep = 1.0 - prob_drop self.flag_on = T.shared(np.cast[theano.config.floatX](1.0)) self.flag_off = 1.0 - self.flag_on seed_this = DropoutLayer.seed_common.randint(0, sys.maxint) mask_rng = theano.tensor.shared_randomstreams.RandomStreams(seed_this) self.mask = mask_rng.binomial(n=1, p=self.prob_keep, size=input.shape) self.output = \ self.flag_on * T.cast(self.mask, theano.config.floatX) * input + \ self.flag_off * self.prob_keep * input DropoutLayer.layers.append(self) print 'dropout layer with P_drop: ' + str(self.prob_drop)
def compute(self, input, params): # We need to be able to turn on and off the dropout (on for training, # off for testing). Therefore use a shared variable to control # the current dropout state. Start in "ON" state by default. self.dropout_on = T.shared(numpy.cast[theano.config.floatX](1.0), \ borrow=True) # Create a random stream to generate a random mask of 0 and 1 # activations. seed = DropoutLayer.__dropout_seed_srng.randint(0, sys.maxint) srng = theano.tensor.shared_randomstreams.RandomStreams(seed) # p=1-p because 1's indicate keep and p is prob of dropping self.mask = srng.binomial(n=1, p=1.0 - self.prob, size=input.shape) # When dropout is off, activations must be multiplied by the average # on probability (ie 1 - p) off_gain = (1.0 - self.prob) # The cast in the following expression is important because: # int * float32 = float64 which pulls things off the gpu self.output = input * self.dropout_on * T.cast(self.mask, theano.config.floatX) + \ off_gain * input * (1.0 - self.dropout_on)
def test_lookup(): table_size =1000 feature_num = 500 lookup_table_layer = LookupTableLayer(table_size, feature_num) input = T.shared(np.asarray([[0,1,2,3],[1,2,3,4], [7,8,9,10],[5,6,7,8]], dtype=np.int32)) output_tensor = lookup_table_layer.output(input,tensor_output=True) tensor_shape = output_tensor.eval().shape assert tensor_shape == (4, 4, 500), "lookup table output tensor shape = {0}".format(tensor_shape) rng = np.random.RandomState(1234) conv1d_layer = Conv1DLayer("test", rng, 1, 100, 10) conv_output = conv1d_layer.output(output_tensor.dimshuffle(0,'x',1,2)) conv_out_shape = conv_output.eval().shape assert conv_out_shape == (4,100, 4, 491), "conv1d output tensor shape = {0}".format(conv_out_shape) batch_size = conv_out_shape[0] sentence_len = conv_out_shape[2] re_organized = conv_output.dimshuffle(0,2,1,3).reshape( ( batch_size, sentence_len, -1 ) ) re_organized_shape = re_organized.eval().shape assert re_organized_shape == (4,4, 100* 491), "reorganized output shape = {0}".format(re_organized_shape)
def matrix(n, m): return tt.shared(np.zeros((n, m)))
def shared(self, x): return tensor.shared(x)
f_out = function([i], out, mode=Mode(linker='cvm')) theano.printing.debugprint(f_out, print_type=True) print f_out(3) print f_out(0) print f_out(-1) dout_di = theano.grad(out, i) f_grad = theano.function([i], dout_di, mode=Mode(linker='cvm')) theano.printing.debugprint(f_grad, print_type=True) print f_grad(3) print f_grad(0) print f_grad(-1) # One way it might be possible (but maybe not a good idea) would be to # add a special case in theano.grad, so that if an intermediate gradient # has the form og=ifelse(c, DisconnectedType, ig), we backpropagate using # ifelse(c, DisconnectedType, op.grad(ig)) instead of op.grad(og). "Does it make sense to get the row by just a single number ? Even if it does for sake of consistency I would use matlab syntax" "Does it make sense to take dot between a row and a matrix ?" from theano import function a=T.shared(np.random.randn(3,3,3)) b=T.shared(np.random.randn(3,3,3)) f=T.abs_ reduce(lambda x,y: x+f(y).sum(), [a,b], 0) aa=T.grad(reduce(lambda x,y: x+f(y).sum(), [a,b], 0), [a,b]) function([], aa)() # This should equal [-1, 1, 1, -1 etc.]
y_time = tt.iscalar() y_seq_id = tt.iscalar() y = X[0:y_time, y_seq_id] f = theano.function([X, y_time, y_seq_id], y) exit(0) def step(xx, a): return xx x = tt.shared(np.random.randn(10, 1, 1)) xf, _ = theano.scan(step, sequences=x, non_sequences=[1], go_backwards=False) xb, _ = theano.scan(step, sequences=x, non_sequences=[0], go_backwards=True) xb = xb[::-1,] diff = (xb - xf).norm(2) f = theano.function([], [x, xf, xb, diff]) print f() exit(0) x = tt.shared(np.random.randn(10, 5)) f = theano.function([], x[::-1,:])
from nn.Conv2DLayer import Conv2DLayer from nn.Pool2DLayer import Pool2DLayer from nn.NoiseLayer import NoiseLayer from nn.Network import Network from nn.AdamTrainer import AdamTrainer from nn.ReshapeLayer import ReshapeLayer from utils import load_data rng = np.random.RandomState(23455) dataset = '../data/mnist/mnist.pkl.gz' datasets = load_data(dataset) shared = lambda d: T.shared(d, borrow=True) train_set_x, train_set_y = map(shared, datasets[0]) valid_set_x, valid_set_y = map(shared, datasets[1]) test_set_x, test_set_y = map(shared, datasets[2]) batchsize = 1 train_set_x = train_set_x.reshape((50000, 1, 28, 28)) valid_set_x = valid_set_x.reshape((10000, 1, 28, 28)) test_set_x = test_set_x.reshape((10000, 1, 28, 28)) network = Network( NoiseLayer(rng, 0.3), Conv2DLayer(rng, (4, 1, 5, 5), (batchsize, 1, 28, 28)),
def matrix(n: int, m: int) -> TensorVariable: return tt.shared(np.zeros((n, m)))
def __init__(self, input_layer, output_imshape): self.input_layer = input_layer self.output_imshape = output_imshape self.x_coords, self.y_coords = np.indices(output_imshape) self.x_coords = T.shared(self.x_coords.astype(np.float32)) self.y_coords = T.shared(self.y_coords.astype(np.float32))
conv.connect(emb, rng, ()) X = tt.tensor3() y_time = tt.iscalar() y_seq_id = tt.iscalar() y = X[0:y_time, y_seq_id] f = theano.function([X, y_time, y_seq_id], y) exit(0) def step(xx, a): return xx x = tt.shared(np.random.randn(10, 1, 1)) xf, _ = theano.scan(step, sequences=x, non_sequences=[1], go_backwards=False) xb, _ = theano.scan(step, sequences=x, non_sequences=[0], go_backwards=True) xb = xb[::-1, ] diff = (xb - xf).norm(2) f = theano.function([], [x, xf, xb, diff]) print f()