def __init__(self, first_W): self.log_regression = LogisticRegression(first_W) st = T.dvector('st') ac = T.dvector('ac') z = ac*ac self.q_ = th.function(inputs=[st, ac], outputs=[self.log_regression.cost(T.concatenate([ac, z, st, ac[:-1] * st[:-1]]))])
def dtw(array1, array2): """ Accepts: two one dimensional arrays Returns: (float) DTW distance between them. """ s = np.zeros((array1.size+1, array2.size+1)) s[:,0] = 1e6 s[0,:] = 1e6 s[0,0] = 0.0 # Set up symbolic variables square = T.dmatrix('square') vec1 = T.dvector('vec1') vec2 = T.dvector('vec2') vec1_length = T.dscalar('vec1_length') vec2_length = T.dscalar('vec2_length') outer_loop = T.arange(vec1_length, dtype='int64') inner_loop = T.arange(vec2_length, dtype='int64') # Run the outer loop path, _ = scan(fn=outer, outputs_info=[dict(initial=square, taps=[-1])], non_sequences=[inner_loop, vec1, vec2], sequences=outer_loop) # Compile the function theano_square = function([vec1, vec2, square, vec1_length, vec2_length], path, on_unused_input='warn') # Call the compiled function and return the actual distance return theano_square(array1, array2, s, array1.size, array2.size)[-1][array1.size, array2.size]
def __init__(self, input=tensor.dvector('input'), target=tensor.dvector('target'), n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw): super(NNet, self).__init__(**kw) self.input = input self.target = target self.lr = shared(lr, 'learning_rate') self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target)**2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)} self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def UV( U = Th.dmatrix('U') , V1 = Th.dvector('V1') , V2 = Th.dvector('V2') , **result): ''' Reparameterize theta and M as a function of U, V1 and V2. ''' result['theta'] = Th.dot( U.T , V1 ) result['M' ] = Th.dot( V1 * U.T , (V2 * U.T).T ) return result
def test_loss_updates_one_layer_positive_features_with_negative_weights_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1) mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function( [features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1,1,1,1] action = 0 reward = 1 next_features = [1,1,1,1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = mlp.layers[0].W.eval().tolist() expected_weights = [[-1,-1], [-1,-1], [-1,-1], [-1,-1]] self.assertEqual(actual_loss, expected_loss) self.assertSequenceEqual(actual_weights, expected_weights)
def LQLEP_wBarrier( LQLEP = Th.dscalar(), ldet = Th.dscalar(), v1 = Th.dvector(), N_spike = Th.dscalar(), ImM = Th.dmatrix(), U = Th.dmatrix(), V2 = Th.dvector(), u = Th.dvector(), C = Th.dmatrix(), **other): ''' The actual Linear-Quadratic-Exponential-Poisson log-likelihood, as a function of theta and M, with a barrier on the log-det term and a prior. ''' sq_nonlinearity = V2**2.*Th.sum( Th.dot(U,C)*U, axis=[1]) #Th.sum(U**2,axis=[1]) nonlinearity = V2 * Th.sqrt( Th.sum( Th.dot(U,C)*U, axis=[1])) #Th.sum(U**2,axis=[1]) ) if other.has_key('uc'): LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 10. * Th.sum( (other['uc'][2:]+other['uc'][:-2]-2*other['uc'][1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) else: LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) eigsImM,barrier = eig( ImM ) barrier = 1-(Th.sum(Th.log(eigsImM))>-250) * \ (Th.min(eigsImM)>0) * (Th.max(4*sq_nonlinearity)<1) other.update(locals()) return named( **other )
def neural_net( x=T.dmatrix(), #our points, one point per row y=T.dmatrix(), #our targets w=T.dmatrix(), #first layer weights b=T.dvector(), #first layer bias v=T.dmatrix(), #second layer weights c=T.dvector(), #second layer bias step=T.dscalar(), #step size for gradient descent l2_coef=T.dscalar() #l2 regularization amount ): """Idea A: """ hid = T.tanh(T.dot(x, w) + b) pred = T.dot(hid, v) + c sse = T.sum((pred - y) * (pred - y)) w_l2 = T.sum(T.sum(w*w)) v_l2 = T.sum(T.sum(v*v)) loss = sse + l2_coef * (w_l2 + v_l2) def symbolic_params(cls): return [cls.w, cls.b, cls.v, cls.c] def update(cls, x, y, **kwargs): params = cls.symbolic_params() gp = T.grad(cls.loss, params) return [], [In(p, update=p - cls.step * g) for p,g in zip(params, gp)] def predict(cls, x, **kwargs): return cls.pred, [] return locals()
def test_loss_updates_one_layer_positive_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True) # hidden_layer.W = W mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function( [features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1,1,1,1] action = 0 reward = 1 next_features = [1,1,1,1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = list(mlp.layers[0].W.eval()) expected_weights = [[1,0], [1,0], [1,0], [1,0]] self.assertEqual(actual_loss, expected_loss) self.assertTrue(np.array_equal(actual_weights, expected_weights))
def theano_setup(self): W = T.dmatrix('W') b = T.dvector('b') c = T.dvector('c') x = T.dmatrix('x') s = T.dot(x, W) + c # h = 1 / (1 + T.exp(-s)) # h = T.nnet.sigmoid(s) h = T.tanh(s) # r = T.dot(h,W.T) + b # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b)) ract = T.dot(h,W.T) + b r = self.output_scaling_factor * T.tanh(ract) #g = function([W,b,c,x], h) #f = function([W,b,c,h], r) #fg = function([W,b,c,x], r) # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') all_losses = ((r - y)**2) loss = T.sum(all_losses) #loss = ((r - y)**2).sum() self.theano_encode_decode = function([W,b,c,x], r) self.theano_all_losses = function([W,b,c,x,y], [all_losses, T.abs_(s), T.abs_(ract)]) self.theano_gradients = function([W,b,c,x,y], [T.grad(loss, W), T.grad(loss, b), T.grad(loss, c)])
def test_0(): N = 16*1000*10*1 if 1: aval = abs(numpy.random.randn(N).astype('float32'))+.1 bval = numpy.random.randn(N).astype('float32') a = T.fvector() b = T.fvector() else: aval = abs(numpy.random.randn(N))+.1 bval = numpy.random.randn(N) a = T.dvector() b = T.dvector() f = theano.function([a,b], T.pow(a,b), mode='LAZY') theano_opencl.elemwise.swap_impls=False g = theano.function([a,b], T.pow(a,b), mode='LAZY') print 'ocl time', timeit.Timer(lambda: f(aval, bval)).repeat(3,3) print 'gcc time', timeit.Timer(lambda: g(aval, bval)).repeat(3,3) print 'numpy time', timeit.Timer(lambda: aval**bval).repeat(3,3) assert ((f(aval, bval) - aval**bval)**2).sum() < 1.1 assert ((g(aval, bval) - aval**bval)**2).sum() < 1.1
def test_uniform_vector(self): random = RandomStreams(utt.fetch_seed()) low = tensor.dvector() high = tensor.dvector() out = random.uniform(low=low, high=high) assert out.ndim == 1 f = function([low, high], out) low_val = [.1, .2, .3] high_val = [1.1, 2.2, 3.3] seed_gen = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) # Arguments of size (3,) val0 = f(low_val, high_val) numpy_val0 = numpy_rng.uniform(low=low_val, high=high_val) print('THEANO', val0) print('NUMPY', numpy_val0) assert numpy.all(val0 == numpy_val0) # arguments of size (2,) val1 = f(low_val[:-1], high_val[:-1]) numpy_val1 = numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1]) print('THEANO', val1) print('NUMPY', numpy_val1) assert numpy.all(val1 == numpy_val1) # Specifying the size explicitly g = function([low, high], random.uniform(low=low, high=high, size=(3,))) val2 = g(low_val, high_val) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) numpy_val2 = numpy_rng.uniform(low=low_val, high=high_val, size=(3,)) assert numpy.all(val2 == numpy_val2) self.assertRaises(ValueError, g, low_val[:-1], high_val[:-1])
def __init__(self,N,Nsub,NRGC,prior=1): self.N = N self.Nsub = Nsub self.NRGC = NRGC U = Th.dmatrix() # SYMBOLIC variables # V1 = Th.dvector() # V2 = Th.dvector() # STA = Th.dvector() # STC = Th.dmatrix() # theta = Th.dot( U.T , V1 ) # UV1U = Th.dot( U , theta ) # UV1V2U= Th.dot( V1 * U.T , (V2 * U.T).T ) # posterior = -0.5 * Th.sum( V1 * V2 * U.T*U.T ) \ -0.25* Th.sum( UV1V2U.T * UV1V2U ) \ -0.5 * Th.sum( UV1U * UV1U * UV1U *V2 *V2 * V1 ) \ -0.5 * Th.sum( UV1U * UV1U * V2 * V1 ) \ -0.5 * Th.sum( theta * theta ) \ + Th.dot( theta.T , STA ) \ + Th.sum( Th.dot( V1* V2*U.T , U ) \ * (STC + STA.T*STA) ) dpost_dU = Th.grad( cost = posterior , # wrt = U ) # dpost_dV1 = Th.grad( cost = posterior , # wrt = V1 ) # dpost_dV2 = Th.grad( cost = posterior , # wrt = V2 ) # # self.posterior = function( [U,V2,V1,STA,STC], UV1V2U) # self.posterior = function( [U,V2,V1,STA,STC], posterior) # self.dpost_dU = function( [U,V2,V1,STA,STC], dpost_dU ) # self.dpost_dV1 = function( [U,V2,V1,STA,STC], dpost_dV1 ) # self.dpost_dV2 = function( [U,V2,V1,STA,STC], dpost_dV2 ) #
def Pretrain(sda, data, loops, rate): L = 0 R = 0 input = T.dvector() through = theano.function( inputs = [input], outputs = input) for lvl in xrange(sda.n_layers-1): train = sda.getTrainingFunc(lvl,lvl+1) for loop in xrange(loops*len(data[0])): p0 = random.randint(0, len(data[0])-1) p1 = random.randint(0, len(data[1])-1) patch0 = numpy.log(abs(0.7*data[0][p0] + 0.3*data[1][p1])**2+1)/20.0*0.8+0.1 patch1 = numpy.log(abs(data[0][p0])**2+1)/20.0*0.8+0.1 patch1 /= numpy.dot(patch1, patch1) # plt.subplot(211) # plt.imshow(patch0.reshape((5,128))) # plt.subplot(212) # plt.imshow(patch1.reshape((5,128))) # plt.show() l,r = train(through(patch1), through(patch1), rate, 0.05) L = L + l R = R + r if loop%500 == 499: print lvl, loop, ':', 10*numpy.log10(0.75**2/(L/500.0/len(data[0][0]))), R/500.0 L = 0 R = 0 input = T.dvector() through = theano.function( inputs = [input], outputs = sda.goThrough(input, 0, lvl+1) )
def test_normal_vector(self): random = RandomStreams(utt.fetch_seed()) avg = tensor.dvector() std = tensor.dvector() out = random.normal(avg=avg, std=std) assert out.ndim == 1 f = function([avg, std], out) avg_val = [1, 2, 3] std_val = [.1, .2, .3] seed_gen = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) # Arguments of size (3,) val0 = f(avg_val, std_val) numpy_val0 = numpy_rng.normal(loc=avg_val, scale=std_val) assert numpy.allclose(val0, numpy_val0) # arguments of size (2,) val1 = f(avg_val[:-1], std_val[:-1]) numpy_val1 = numpy_rng.normal(loc=avg_val[:-1], scale=std_val[:-1]) assert numpy.allclose(val1, numpy_val1) # Specifying the size explicitly g = function([avg, std], random.normal(avg=avg, std=std, size=(3,))) val2 = g(avg_val, std_val) numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30))) numpy_val2 = numpy_rng.normal(loc=avg_val, scale=std_val, size=(3,)) assert numpy.allclose(val2, numpy_val2) self.assertRaises(ValueError, g, avg_val[:-1], std_val[:-1])
def test_multilayer_sparse(self): # fixed parameters bsize = 10 # batch size imshp = (5,5) kshp = ((3,3),(2,2)) nkerns = (10,20) # per output pixel ssizes = ((1,1),(2,2)) convmodes = ('full','valid',) # symbolic stuff kerns = [tensor.dvector(),tensor.dvector()] input = tensor.dmatrix() rng = numpy.random.RandomState(3423489) # build actual input images img2d = numpy.arange(bsize*numpy.prod(imshp)).reshape((bsize,)+imshp) img1d = img2d.reshape(bsize,-1) for mode in ('FAST_COMPILE','FAST_RUN'): for conv_mode in convmodes: for ss in ssizes: l1hid, l1outshp = sp.applySparseFilter(kerns[0], kshp[0],\ nkerns[0], input, imshp, ss, mode=conv_mode) l2hid, l2outshp = sp.applySparseFilter(kerns[1], kshp[1],\ nkerns[1], l1hid, l1outshp, ss, mode=conv_mode) l1propup = function([kerns[0], input], l1hid, mode=mode) l2propup = function([kerns[1], l1hid], l2hid, mode=mode) # actual values l1kernvals = numpy.arange(numpy.prod(l1outshp)*numpy.prod(kshp[0])) l2kernvals = numpy.arange(numpy.prod(l2outshp)*numpy.prod(kshp[1])*nkerns[0]) l1hidval = l1propup(l1kernvals,img1d) l2hidval = l2propup(l2kernvals,l1hidval)
def __init__(self, sizes, input_dim, output_dim): self.layers = len(sizes) + 1 in_dim = [input_dim] + sizes out_dim = sizes + [output_dim] x = T.dvector('x') y = T.dvector('y') self.hyp_params = [] for i, (r,c) in enumerate(zip(in_dim,out_dim)): if i == 0: obj = HiddenLayer(x, r, c) else: obj = HiddenLayer(obj.output,r,c) self.hyp_params.append(obj.params) yhat = obj.output prediction = T.argmax(yhat) self.predict = theano.function([x],[yhat]) o_error = T.sum(T.sqr(yhat - y)) # o_error = T.sum(T.nnet.categorical_crossentropy(yhat, y)) updates = [] learning_rate = T.scalar('learning_rate') for param in self.hyp_params: updates.append((param['W'], param['W'] - learning_rate * T.grad(o_error,param['W']))) updates.append((param['b'], param['b'] - learning_rate * T.grad(o_error,param['b']))) self.train_step = theano.function([x,y,learning_rate],[o_error], updates = updates)
def test_optimize_xent_vector2(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5) b_val = rng.randn(5) y_val = numpy.asarray([2]) x = T.dvector('x') b = T.dvector('b') y = T.lvector('y') def print_graph(func): for i, node in enumerate(func.maker.fgraph.toposort()): print i, node # Last node should be the output print i, printing.pprint(node.outputs[0]) print ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] for expr in bias_expressions: f = theano.function([x, b, y], expr, mode=mode) if verbose: print_graph(f) try: prev, last = f.maker.fgraph.toposort()[-2:] assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: print_graph(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def make_minimizer(Model): L, y = T.ivector('L'), T.dvector('y') mu, eps = T.dscalar('mu'), T.dscalar('eps') R, eta = T.dtensor3('R'), T.dvector('eta') model = Model(L, y, mu, R, eta, eps) return theano.function([L, y, mu, R, eta, eps], model.minimize())
def init_propagate_function(self): x = T.dvector() y = T.dmatrix() b = T.dvector() z = T.dot(x, y) + b f = theano.function([x,y,b], z) return f
def test_profiling(): old1 = theano.config.profile old2 = theano.config.profile_memory try: theano.config.profile = True theano.config.profile_memory = True x = T.dvector("x") y = T.dvector("y") z = x + y p = theano.ProfileStats(False) if theano.config.mode in ["DebugMode", "DEBUG_MODE"]: m = "FAST_RUN" else: m = None f = theano.function([x, y], z, profile=p, name="test_profiling", mode=m) output = f([1, 2, 3, 4], [1, 1, 1, 1]) buf = StringIO.StringIO() f.profile.summary(buf) finally: theano.config.profile = old1 theano.config.profile_memory = old2
def theano_setup(self): # The matrices Wb and Wc were originally tied. # Because of that, I decided to keep Wb and Wc with # the same shape (instead of being transposed) to # avoid disturbing the code as much as possible. Wb = T.dmatrix('Wb') Wc = T.dmatrix('Wc') b = T.dvector('b') c = T.dvector('c') s = T.dscalar('s') x = T.dmatrix('x') h_act = T.dot(x, Wc) + c if self.act_func[0] == 'tanh': h = T.tanh(h_act) elif self.act_func[0] == 'sigmoid': h = T.nnet.sigmoid(h_act) elif self.act_func[0] == 'id': # bad idae h = h_act else: raise("Invalid act_func[0]") r_act = T.dot(h, Wb.T) + b if self.act_func[1] == 'tanh': r = s * T.tanh(r_act) elif self.act_func[1] == 'sigmoid': r = s * T.nnet.sigmoid(r_act) elif self.act_func[1] == 'id': r = s * r_act else: raise("Invalid act_func[1]") # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') loss = ((r - y)**2) sum_loss = T.sum(loss) # theano_encode_decode : vectorial function in argument X. # theano_loss : vectorial function in argument X. # theano_gradients : returns triplet of gradients, each of # which involves the all data X summed # so it's not a "vectorial" function. self.theano_encode_decode = function([Wb,Wc,b,c,s,x], r) self.theano_loss = function([Wb,Wc,b,c,s,x,y], loss) self.theano_gradients = function([Wb,Wc,b,c,s,x,y], [T.grad(sum_loss, Wb), T.grad(sum_loss, Wc), T.grad(sum_loss, b), T.grad(sum_loss, c), T.grad(sum_loss, s)]) # other useful theano functions for the experiments that involve # adding noise to the hidden states self.theano_encode = function([Wc,c,x], h) self.theano_decode = function([Wb,b,s,h], r)
def eigs( theta = Th.dvector('theta'), M = Th.dmatrix('M') , STA = Th.dvector('STA') , STC = Th.dmatrix('STC'), **other): ''' Return eigenvalues of I-sym(M), for display/debugging purposes. ''' ImM = Th.identity_like(M)-(M+M.T)/2 w,v = eig( ImM ) return w
def ldet( theta = Th.dvector('theta'), M = Th.dmatrix('M') , STA = Th.dvector('STA'), STC = Th.dmatrix('STC'), **other): ''' Return log-det of I-sym(M), for display/debugging purposes. ''' ImM = Th.identity_like(M)-(M+M.T)/2 w, v = eig(ImM) return Th.sum(Th.log(w))
def RGC_LE(subunit_out=Th.dmatrix(), v1=Th.dvector(), spikes=Th.dvector(), **other): unnormalized = Th.exp(Th.dot(subunit_out.T, v1).T) rgc_out = unnormalized * Th.sum(spikes) / Th.sum(unnormalized) other.update(locals()) return named(**other)
def LNP( theta = Th.dvector(), STA = Th.dvector(), N_spike = Th.dscalar(), C = Th.dmatrix(), **other): ''' LNP log-likelihood, as a function of theta. Minimizer is the STA. ''' LNP = N_spike *( 0.5* Th.sum(Th.dot(C,theta) * theta) - Th.sum( theta * STA )) other.update(locals()) return named( **other )
def LQLEP_input(**other): theta = Th.dvector() M = Th.dmatrix() STA = Th.dvector() STC = Th.dmatrix() N_spike = Th.dscalar() Cm1 = Th.dmatrix() other.update(locals()) return named( **other )
def theanoVecVecMul(In1, In2, opt): var1 = T.dvector('var1') var2 = T.dvector('var2') if opt == 'M': var3 = T.dot(var1, var2) else: var3 = T.mul(var1, var2) DivVec = function([var1, var2], var3) return DivVec(In1, In2)
def setup(self, bottom, top): import theano.tensor as T import theano x = T.dvector('x') v = T.dvector('v') y = x * 2 yg = T.Lop(y, x, v) self.f = theano.function([x], y) self.b = theano.function([x, v], yg, on_unused_input='warn')
def UV12(U=Th.dmatrix(), V1=Th.dmatrix(), V2=Th.dvector(), STAs=Th.dmatrix(), STCs=Th.dtensor3(), N_spikes=Th.dvector(), **other): other.update(locals()) return named(**other)
def theg3(): w = T.dmatrix('weights') # matrix of 64-bit (double) floats v = T.dvector('upstream activations') # vector of " " " b = T.dvector('biases') x = T.dot(v,w) + b #T.dot = tensor dot product x.name = 'integrated signals' f = theano.function([v,w,b],x) ppth(x,graph=True) return f
def prepare_theano(gfs, runidx=0, dtype='float64'): theano_rts = tt.vector('durations_%i' % runidx, dtype=dtype) theano_stts = tt.vector('starttimes_%i' % runidx, dtype=dtype) theano_slips = tt.dvector('slips_%i' % runidx) gfs.init_optimization() return theano_rts, theano_stts, theano_slips
def test_sparse(): print '\n\n*************************************************' print ' TEST SPARSE' print '*************************************************' # fixed parameters bsize = 10 # batch size imshp = (28, 28) kshp = (5, 5) nkern = 1 # per output pixel ssizes = ((1, 1), (2, 2)) convmodes = ( 'full', 'valid', ) # symbolic stuff bias = T.dvector() kerns = T.dvector() input = T.dmatrix() rng = N.random.RandomState(3423489) import theano.gof as gof #Mode(optimizer='fast_run', linker=gof.OpWiseCLinker(allow_gc=False)),): ntot, ttot = 0, 0 for conv_mode in convmodes: for ss in ssizes: output, outshp = sp.applySparseFilter(kerns, kshp,\ nkern, input, imshp, ss, bias=bias, mode=conv_mode) f = function([kerns, bias, input], output) # build actual input images img2d = N.arange(bsize * N.prod(imshp)).reshape((bsize, ) + imshp) img1d = img2d.reshape(bsize, -1) zeropad_img = N.zeros((bsize,\ img2d.shape[1]+2*(kshp[0]-1),\ img2d.shape[2]+2*(kshp[1]-1))) zeropad_img[:, kshp[0] - 1:kshp[0] - 1 + img2d.shape[1], kshp[1] - 1:kshp[1] - 1 + img2d.shape[2]] = img2d # build kernel matrix -- flatten it for theano stuff filters = N.arange(N.prod(outshp)*N.prod(kshp)).\ reshape(nkern,N.prod(outshp[1:]),N.prod(kshp)) spfilt = filters.flatten() biasvals = N.arange(N.prod(outshp)) # compute output by hand ntime1 = time.time() refout = N.zeros((bsize, nkern, outshp[1], outshp[2])) patch = N.zeros((kshp[0], kshp[1])) for b in xrange(bsize): for k in xrange(nkern): pixi = 0 # pixel index in raster order for j in xrange(outshp[1]): for i in xrange(outshp[2]): n = j * ss[0] m = i * ss[1] patch = zeropad_img[b, n:n + kshp[0], m:m + kshp[1]] refout[b,k,j,i] = N.dot(filters[k,pixi,:],\ patch.flatten()) pixi += 1 refout = refout.reshape(bsize, -1) + biasvals ntot += time.time() - ntime1 # need to flatten images ttime1 = time.time() out1 = f(spfilt, biasvals, img1d) ttot += time.time() - ttime1 temp = refout - out1 assert (temp < 1e-10).all() # test downward propagation vis = T.grad(output, input, output) downprop = function([kerns, output], vis) temp1 = time.time() for zz in range(100): visval = downprop(spfilt, out1) indices, indptr, spmat_shape, sptype, outshp, kmap = \ sp.convolution_indices.sparse_eval(imshp,kshp,nkern,ss,conv_mode) spmat = sparse.csc_matrix((spfilt[kmap], indices, indptr), spmat_shape) visref = N.dot(out1, spmat.todense()) assert N.all(visref == visval) print '**** Sparse Profiling Results ****' print 'Numpy processing time: ', ntot print 'Theano processing time: ', ttot
def test_gc_never_pickles_temporaries(): x = T.dvector() for i in xrange(2): # TODO: 30 causes like LONG compilation due to MERGE if i: r = r + r / 10 else: r = x optimizer = None optimizer = 'fast_run' for f_linker, g_linker in [(theano.PerformLinker(allow_gc=True), theano.PerformLinker(allow_gc=False)), (theano.OpWiseCLinker(allow_gc=True), theano.OpWiseCLinker(allow_gc=False))]: # f_linker has garbage collection # g_linker has no garbage collection f = theano.function([x], r, mode=theano.Mode(optimizer=optimizer, linker=f_linker)) g = theano.function([x], r, mode=theano.Mode(optimizer=optimizer, linker=g_linker)) pre_f = pickle.dumps(f) pre_g = pickle.dumps(g) len_pre_f = len(pre_f) len_pre_g = len(pre_g) # We can't compare the content or the length of the string # between f and g. 2 reason, we store some timming information # in float. They won't be the same each time. Different float # can have different lenght when printed. def a(fn): return len(pickle.dumps(fn.maker)) assert a(f) == a(f) # some sanity checks on the pickling mechanism assert a(g) == a(g) # some sanity checks on the pickling mechanism def b(fn): return len( pickle.dumps( theano.compile.function_module._pickle_Function(fn))) assert b(f) == b(f) # some sanity checks on the pickling mechanism def c(fn): return len(pickle.dumps(fn)) assert c(f) == c(f) # some sanity checks on the pickling mechanism assert c(g) == c(g) # some sanity checks on the pickling mechanism # now run the function once to create temporaries within the no-gc # linker f(numpy.ones(100, dtype='float64')) g(numpy.ones(100, dtype='float64')) # serialize the functions again post_f = pickle.dumps(f) post_g = pickle.dumps(g) len_post_f = len(post_f) len_post_g = len(post_g) # assert that f() didn't cause the function to grow # allow_gc should leave the function un-changed by calling assert len_pre_f == len_post_f, (len_pre_f, len_post_f) # assert that g() didn't cause g to grow because temporaries # that weren't collected shouldn't be pickled anyway # Allow for a couple of bytes of difference, since timing info, # for instance, can be represented as text of varying size. assert abs(len_post_f - len_post_g) < 256, (f_linker, len_post_f, len_post_g)
import theano.tensor as T import numpy as np def computer_accuracy(y_target, y_predict): correct_prediction = np.equal(y_predict, y_target) accuracy = np.sum(correct_prediction)/len(correct_prediction) return accuracy rng = np.random N = 400 feats = 784 D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2)) x = T.dmatrix('x') y = T.dvector('y') W = theano.shared(rng.randn(feats), name ='w') b = theano.shared(0.1, name ='b') p_1 = T.nnet.sigmoid(T.dot(x, W) + b) prediction = p_1 > 0.5 xent = -y*T.log(p_1) - (1 - y)*T.log(1 - p_1) cost = xent.mean() + 0.01*(W**2).sum() gW, gb = T.grad(cost, [W, b]) learning_rate = 0.1 train = theano.function( inputs = [x, y], outputs = [prediction, xent.mean()], updates = [(W, W - learning_rate*gW), (b, b - learning_rate*gb)]
def test_infer_shape(self): rng_R = random_state_type() rng_R_val = numpy.random.RandomState(utt.fetch_seed()) # no shape specified, default args post_r, out = uniform(rng_R) self._compile_and_check([rng_R], [out], [rng_R_val], RandomFunction) post_r, out = uniform(rng_R, size=None, ndim=2) self._compile_and_check([rng_R], [out], [rng_R_val], RandomFunction) """ #infer_shape don't work for multinomial. #The parameter ndim_added is set to 1 and in this case, the infer_shape #inplementation don't know how to infer the shape post_r, out = multinomial(rng_R) self._compile_and_check([rng_R], [out], [rng_R_val], RandomFunction) """ # no shape specified, args have to be broadcasted low = tensor.TensorType(dtype='float64', broadcastable=(False, True, True))() high = tensor.TensorType(dtype='float64', broadcastable=(True, True, True, False))() post_r, out = uniform(rng_R, size=None, ndim=2, low=low, high=high) low_val = [[[3]], [[4]], [[-5]]] high_val = [[[[5, 8]]]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # multinomial, specified shape """ #infer_shape don't work for multinomial n = iscalar() pvals = dvector() size_val = (7, 3) n_val = 6 pvals_val = [0.2] * 5 post_r, out = multinomial(rng_R, size=size_val, n=n, pvals=pvals, ndim=2) self._compile_and_check([rng_R, n, pvals], [out], [rng_R_val, n_val, pvals_val], RandomFunction) """ # uniform vector low and high low = dvector() high = dvector() post_r, out = uniform(rng_R, low=low, high=1) low_val = [-5, .5, 0, 1] self._compile_and_check([rng_R, low], [out], [rng_R_val, low_val], RandomFunction) low_val = [.9] self._compile_and_check([rng_R, low], [out], [rng_R_val, low_val], RandomFunction) post_r, out = uniform(rng_R, low=low, high=high) low_val = [-4., -2] high_val = [-1, 0] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) low_val = [-4.] high_val = [-1] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # uniform broadcasting low and high low = dvector() high = dcol() post_r, out = uniform(rng_R, low=low, high=high) low_val = [-5, .5, 0, 1] high_val = [[1.]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) low_val = [.9] high_val = [[1.], [1.1], [1.5]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) low_val = [-5, .5, 0, 1] high_val = [[1.], [1.1], [1.5]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # uniform with vector slice low = dvector() high = dvector() post_r, out = uniform(rng_R, low=low, high=high) low_val = [.1, .2, .3] high_val = [1.1, 2.2, 3.3] size_val = (3, ) self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val[:-1], high_val[:-1]], RandomFunction) # uniform with explicit size and size implicit in parameters # NOTE 1: Would it be desirable that size could also be supplied # as a Theano variable? post_r, out = uniform(rng_R, size=size_val, low=low, high=high) self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # binomial with vector slice n = ivector() prob = dvector() post_r, out = binomial(rng_R, n=n, p=prob) n_val = [1, 2, 3] prob_val = [.1, .2, .3] size_val = (3, ) self._compile_and_check([rng_R, n, prob], [out], [rng_R_val, n_val[:-1], prob_val[:-1]], RandomFunction) # binomial with explicit size and size implicit in parameters # cf. NOTE 1 post_r, out = binomial(rng_R, n=n, p=prob, size=size_val) self._compile_and_check([rng_R, n, prob], [out], [rng_R_val, n_val, prob_val], RandomFunction) # normal with vector slice avg = dvector() std = dvector() post_r, out = normal(rng_R, avg=avg, std=std) avg_val = [1, 2, 3] std_val = [.1, .2, .3] size_val = (3, ) self._compile_and_check([rng_R, avg, std], [out], [rng_R_val, avg_val[:-1], std_val[:-1]], RandomFunction) # normal with explicit size and size implicit in parameters # cf. NOTE 1 post_r, out = normal(rng_R, avg=avg, std=std, size=size_val) self._compile_and_check([rng_R, avg, std], [out], [rng_R_val, avg_val, std_val], RandomFunction) # multinomial with tensor-3 probabilities """
import theano import theano.tensor as tt import numpy as np import starry import matplotlib.pyplot as plt import pytest map = starry.Map(ydeg=1, reflected=True) _b = tt.dvector("b") _theta = tt.dvector("theta") _bo = tt.dvector("bo") _ro = tt.dscalar("ro") _sigr = tt.dscalar("sigr") _s = theano.function([_b, _theta, _bo, _ro, _sigr], map.ops.sT(_b, _theta, _bo, _ro, _sigr)) def s(b, theta, bo, ro, sigr, n=0): if hasattr(ro, "__len__"): assert not (hasattr(b, "__len__") or hasattr(theta, "__len__") or hasattr(bo, "__len__") or hasattr(sigr, "__len__")) return [ _s([b], [theta], [bo], ro[i], sigr)[0, n] for i in range(len(ro)) ] elif hasattr(sigr, "__len__"): assert not (hasattr(b, "__len__") or hasattr(theta, "__len__") or hasattr(bo, "__len__") or hasattr(ro, "__len__")) return [ _s([b], [theta], [bo], ro, sigr[i])[0, n] for i in range(len(sigr)) ] else:
def _get_compiled_theano_functions(N_QUAD_PTS): # Planet masses: m1,m2 m1, m2 = T.dscalars(2) mstar = 1 mu1 = m1 * mstar / (mstar + m1) mu2 = m2 * mstar / (mstar + m2) eta1 = mstar + m1 eta2 = mstar + m2 beta1 = mu1 * T.sqrt(eta1 / mstar) / (mu1 + mu2) beta2 = mu2 * T.sqrt(eta2 / mstar) / (mu1 + mu2) j, k = T.lscalars('jk') s = (j - k) / k # Angle variable for averaging over psi = T.dvector('psi') # Quadrature weights quad_weights = T.dvector('w') # Dynamical variables: Ndof = 3 Nconst = 1 dyvars = T.vector() s1, s2, phi, I1, I2, Phi, dRtilde = [ dyvars[i] for i in range(2 * Ndof + Nconst) ] a20 = T.constant(1.) a10 = ((j - k) / j)**(2 / 3) * (eta1 / eta2)**(1 / 3) * a20 L10 = beta1 * T.sqrt(a10) L20 = beta2 * T.sqrt(a20) Psi = s * L20 + (1 + s) * L10 Rtilde = dRtilde - L10 - L20 #### # angles #### rtilde = T.constant(0.) Omega = -1 * rtilde l1 = phi + k * (1 + s) * psi + Omega l2 = phi + k * s * psi + Omega gamma1 = s1 - phi - Omega gamma2 = s2 - phi - Omega q1 = 0.5 * np.pi - Omega q2 = -0.5 * np.pi - Omega pomega1 = -1 * gamma1 pomega2 = -1 * gamma2 Omega1 = -1 * q1 Omega2 = -1 * q2 omega1 = pomega1 - Omega1 omega2 = pomega2 - Omega2 ### # actions ### Gamma1 = I1 Gamma2 = I2 L1 = Psi / k - s * (I1 + I2) - s * Phi L2 = -1 * Psi / k + (1 + s) * (I1 + I2) + (1 + s) * Phi Cz = -1 * Rtilde R = L1 + L2 - Gamma1 - Gamma2 - Cz G1 = L1 - Gamma1 G2 = L2 - Gamma2 r2_by_r1 = (L2 - L1 - Gamma2 + Gamma1) / (L1 + L2 - Gamma1 - Gamma2 - R) rho1 = 0.5 * R * (1 + r2_by_r1) rho2 = 0.5 * R * (1 - r2_by_r1) a1 = (L1 / beta1)**2 e1 = T.sqrt(1 - (1 - (Gamma1 / L1))**2) a2 = (L2 / beta2)**2 e2 = T.sqrt(1 - (1 - (Gamma2 / L2))**2) cos_inc1 = 1 - rho1 / G1 cos_inc2 = 1 - rho2 / G2 inc1 = T.arccos(cos_inc1) inc2 = T.arccos(cos_inc2) Hkep = -0.5 * T.sqrt(eta1) * beta1 / a1 - 0.5 * T.sqrt(eta2) * beta2 / a2 ko = KeplerOp() M1 = l1 - pomega1 M2 = l2 - pomega2 sinf1, cosf1 = ko(M1, e1 + T.zeros_like(M1)) sinf2, cosf2 = ko(M2, e2 + T.zeros_like(M2)) # n1 = T.sqrt(eta1 / mstar) * a1**(-3 / 2) n2 = T.sqrt(eta2 / mstar) * a2**(-3 / 2) Hint_dir, Hint_ind, r1, r2, v1, v2 = calc_Hint_components_sinf_cosf( a1, a2, e1, e2, inc1, inc2, omega1, omega2, Omega1, Omega2, n1, n2, sinf1, cosf1, sinf2, cosf2) eps = m1 * m2 / (mu1 + mu2) / T.sqrt(mstar) Hpert = (Hint_dir + Hint_ind / mstar) Hpert_av = Hpert.dot(quad_weights) Htot = Hkep + eps * Hpert_av ##################################################### # Set parameters for compiling functions with Theano ##################################################### # Get numerical quadrature nodes and weights nodes, weights = np.polynomial.legendre.leggauss(N_QUAD_PTS) # Rescale for integration interval from [-1,1] to [-pi,pi] nodes = nodes * np.pi weights = weights * 0.5 # 'givens' will fix some parameters of Theano functions compiled below givens = [(psi, nodes), (quad_weights, weights)] # 'ins' will set the inputs of Theano functions compiled below # Note: 'extra_ins' will be passed as values of object attributes # of the 'ResonanceEquations' class 'defined below extra_ins = [m1, m2, j, k] ins = [dyvars] + extra_ins orbels = [a1, e1, inc1, k * s1, a2, e2, inc2, k * s2, phi, Omega] orbels_dict = dict( zip([ 'a1', 'e1', 'inc1', 'theta1', 'a2', 'e2', 'inc2', 'theta2', 'phi' ], orbels)) actions = [L1, L2, Gamma1, Gamma2, rho1, rho2] actions_dict = dict( zip(['L1', 'L2', 'Gamma1', 'Gamma2', 'Q1', 'Q2'], actions)) # Conservative flow gradHtot = T.grad(Htot, wrt=dyvars) hessHtot = theano.gradient.hessian(Htot, wrt=dyvars) Jtens = T.as_tensor( np.pad(_get_Omega_matrix(Ndof), (0, Nconst), 'constant')) H_flow_vec = Jtens.dot(gradHtot) H_flow_jac = Jtens.dot(hessHtot) ########################## # Compile Theano functions ########################## orbels_fn = theano.function(inputs=ins, outputs=orbels_dict, givens=givens, on_unused_input='ignore') actions_fn = theano.function(inputs=ins, outputs=actions_dict, givens=givens, on_unused_input='ignore') Rtilde_fn = theano.function(inputs=ins, outputs=Rtilde, givens=givens, on_unused_input='ignore') Htot_fn = theano.function(inputs=ins, outputs=Htot, givens=givens, on_unused_input='ignore') Hpert_fn = theano.function(inputs=ins, outputs=Hpert_av, givens=givens, on_unused_input='ignore') Hpert_components_fn = theano.function( inputs=ins, outputs=[Hint_dir.dot(quad_weights), Hint_ind.dot(quad_weights)], givens=givens, on_unused_input='ignore') H_flow_vec_fn = theano.function(inputs=ins, outputs=H_flow_vec, givens=givens, on_unused_input='ignore') H_flow_jac_fn = theano.function(inputs=ins, outputs=H_flow_jac, givens=givens, on_unused_input='ignore') return dict({ 'orbital_elements': orbels_fn, 'actions': actions_fn, 'Rtilde': Rtilde_fn, 'Hamiltonian': Htot_fn, 'Hpert': Hpert_fn, 'Hpert_components': Hpert_components_fn, 'Hamiltonian_flow': H_flow_vec_fn, 'Hamiltonian_flow_jacobian': H_flow_jac_fn })
def test_scan_debugprint3(): coefficients = theano.tensor.dvector("coefficients") max_coefficients_supported = 10 k = tensor.iscalar("k") A = tensor.dvector("A") # compute A**k def compute_A_k(A, k): # Symbolic description of the result result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A, outputs_info=tensor.ones_like(A), non_sequences=A, n_steps=k) A_k = result[-1] return A_k # Generate the components of the polynomial components, updates = theano.scan(fn=lambda coefficient, power, some_A, some_k: coefficient * (compute_A_k(some_A, some_k) ** power), outputs_info=None, sequences=[ coefficients, theano.tensor.arange( max_coefficients_supported)], non_sequences=[A, k]) # Sum them up polynomial = components.sum() final_result = polynomial output_str = theano.printing.debugprint(final_result, file='str') lines = [] for line in output_str.split('\n'): lines += [line] expected_output = """Sum{acc_dtype=float64} [@A] '' |for{cpu,scan_fn} [@B] '' |Elemwise{minimum,no_inplace} [@C] '' | |Subtensor{int64} [@D] '' | | |Shape [@E] '' | | | |Subtensor{int64::} [@F] 'coefficients[0:]' | | | |coefficients [@G] | | | |Constant{0} [@H] | | |Constant{0} [@I] | |Subtensor{int64} [@J] '' | |Shape [@K] '' | | |Subtensor{int64::} [@L] '' | | |ARange [@M] '' | | | |TensorConstant{0} [@N] | | | |TensorConstant{10} [@O] | | | |TensorConstant{1} [@P] | | |Constant{0} [@Q] | |Constant{0} [@R] |Subtensor{:int64:} [@S] '' | |Subtensor{int64::} [@F] 'coefficients[0:]' | |ScalarFromTensor [@T] '' | |Elemwise{minimum,no_inplace} [@C] '' |Subtensor{:int64:} [@U] '' | |Subtensor{int64::} [@L] '' | |ScalarFromTensor [@V] '' | |Elemwise{minimum,no_inplace} [@C] '' |Elemwise{minimum,no_inplace} [@C] '' |A [@W] |k [@X] Inner graphs of the scan ops: for{cpu,scan_fn} [@B] '' >Elemwise{mul,no_inplace} [@Y] '' > |DimShuffle{x} [@Z] '' > | |coefficients[t] [@BA] -> [@S] > |Elemwise{pow,no_inplace} [@BB] '' > |Subtensor{int64} [@BC] '' > | |Subtensor{int64::} [@BD] '' > | | |for{cpu,scan_fn} [@BE] '' > | | | |k_copy [@BF] -> [@X] > | | | |IncSubtensor{Set;:int64:} [@BG] '' > | | | | |Alloc [@BH] '' > | | | | | |TensorConstant{0.0} [@BI] > | | | | | |Elemwise{add,no_inplace} [@BJ] '' > | | | | | | |k_copy [@BF] -> [@X] > | | | | | | |Subtensor{int64} [@BK] '' > | | | | | | |Shape [@BL] '' > | | | | | | | |Rebroadcast{0} [@BM] '' > | | | | | | | |DimShuffle{x,0} [@BN] '' > | | | | | | | |Elemwise{second,no_inplace} [@BO] '' > | | | | | | | |A_copy [@BP] -> [@W] > | | | | | | | |DimShuffle{x} [@BQ] '' > | | | | | | | |TensorConstant{1.0} [@BR] > | | | | | | |Constant{0} [@BS] > | | | | | |Subtensor{int64} [@BT] '' > | | | | | |Shape [@BU] '' > | | | | | | |Rebroadcast{0} [@BM] '' > | | | | | |Constant{1} [@BV] > | | | | |Rebroadcast{0} [@BM] '' > | | | | |ScalarFromTensor [@BW] '' > | | | | |Subtensor{int64} [@BK] '' > | | | |A_copy [@BP] -> [@W] > | | |Constant{1} [@BX] > | |Constant{-1} [@BY] > |DimShuffle{x} [@BZ] '' > |<TensorType(int8, scalar)> [@CA] -> [@U] for{cpu,scan_fn} [@BE] '' >Elemwise{mul,no_inplace} [@CB] '' > |<TensorType(float64, vector)> [@CC] -> [@BG] > |A_copy [@CD] -> [@BP]""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
import theano.tensor as tt import theano.sparse as ts from theano.ifelse import ifelse from tqdm import tqdm import time import warnings # Config starry.config.lazy = False starry.config.quiet = True warnings.simplefilter("ignore") HUGE = 1e30 np.random.seed(1234) # Theano dummy variables _y = tt.dvector() _x = tt.dvector() _xs = tt.dvector() _ys = tt.dvector() _zs = tt.dvector() _xo = tt.dvector() _yo = tt.dvector() _ro = tt.dscalar() class Compare(object): """Compare different ways of evaluating the flux.""" def __init__(self, y): self.y = y @property
def test_scan_debugprint3(): coefficients = theano.tensor.dvector("coefficients") max_coefficients_supported = 10 k = tensor.iscalar("k") A = tensor.dvector("A") # compute A**k def compute_A_k(A, k): # Symbolic description of the result result, updates = theano.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=tensor.ones_like(A), non_sequences=A, n_steps=k, ) A_k = result[-1] return A_k # Generate the components of the polynomial components, updates = theano.scan( fn=lambda coefficient, power, some_A, some_k: coefficient * (compute_A_k(some_A, some_k) ** power), outputs_info=None, sequences=[coefficients, theano.tensor.arange(max_coefficients_supported)], non_sequences=[A, k], ) # Sum them up polynomial = components.sum() final_result = polynomial output_str = theano.printing.debugprint(final_result, file="str") lines = output_str.split("\n") expected_output = """Sum{acc_dtype=float64} [id A] '' |for{cpu,scan_fn} [id B] '' |Elemwise{minimum,no_inplace} [id C] '' | |Subtensor{int64} [id D] '' | | |Shape [id E] '' | | | |Subtensor{int64::} [id F] 'coefficients[0:]' | | | |coefficients [id G] | | | |Constant{0} [id H] | | |Constant{0} [id I] | |Subtensor{int64} [id J] '' | |Shape [id K] '' | | |Subtensor{int64::} [id L] '' | | |ARange{dtype='int64'} [id M] '' | | | |TensorConstant{0} [id N] | | | |TensorConstant{10} [id O] | | | |TensorConstant{1} [id P] | | |Constant{0} [id Q] | |Constant{0} [id R] |Subtensor{:int64:} [id S] '' | |Subtensor{int64::} [id F] 'coefficients[0:]' | |ScalarFromTensor [id T] '' | |Elemwise{minimum,no_inplace} [id C] '' |Subtensor{:int64:} [id U] '' | |Subtensor{int64::} [id L] '' | |ScalarFromTensor [id V] '' | |Elemwise{minimum,no_inplace} [id C] '' |Elemwise{minimum,no_inplace} [id C] '' |A [id W] |k [id X] Inner graphs of the scan ops: for{cpu,scan_fn} [id B] '' >Elemwise{mul,no_inplace} [id Y] '' > |InplaceDimShuffle{x} [id Z] '' > | |coefficients[t] [id BA] -> [id S] > |Elemwise{pow,no_inplace} [id BB] '' > |Subtensor{int64} [id BC] '' > | |Subtensor{int64::} [id BD] '' > | | |for{cpu,scan_fn} [id BE] '' > | | | |k_copy [id BF] -> [id X] > | | | |IncSubtensor{Set;:int64:} [id BG] '' > | | | | |AllocEmpty{dtype='float64'} [id BH] '' > | | | | | |Elemwise{add,no_inplace} [id BI] '' > | | | | | | |k_copy [id BF] -> [id X] > | | | | | | |Subtensor{int64} [id BJ] '' > | | | | | | |Shape [id BK] '' > | | | | | | | |Rebroadcast{0} [id BL] '' > | | | | | | | |InplaceDimShuffle{x,0} [id BM] '' > | | | | | | | |Elemwise{second,no_inplace} [id BN] '' > | | | | | | | |A_copy [id BO] -> [id W] > | | | | | | | |InplaceDimShuffle{x} [id BP] '' > | | | | | | | |TensorConstant{1.0} [id BQ] > | | | | | | |Constant{0} [id BR] > | | | | | |Subtensor{int64} [id BS] '' > | | | | | |Shape [id BT] '' > | | | | | | |Rebroadcast{0} [id BL] '' > | | | | | |Constant{1} [id BU] > | | | | |Rebroadcast{0} [id BL] '' > | | | | |ScalarFromTensor [id BV] '' > | | | | |Subtensor{int64} [id BJ] '' > | | | |A_copy [id BO] -> [id W] > | | |Constant{1} [id BW] > | |Constant{-1} [id BX] > |InplaceDimShuffle{x} [id BY] '' > |<TensorType(int64, scalar)> [id BZ] -> [id U] for{cpu,scan_fn} [id BE] '' >Elemwise{mul,no_inplace} [id CA] '' > |<TensorType(float64, vector)> [id CB] -> [id BG] > |A_copy [id CC] -> [id BO]""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def kmLossFunction(self, vMax, rnaConc, kDeg, isEndoRnase, alpha): ''' Generates the functions used for estimating the per-RNA affinities (Michaelis-Menten constants) to the endoRNAses. The optimization problem is formulated as a multidimensional root-finding problem; the goal is to find a set of Michaelis-Menten constants such that the endoRNAse-mediated degradation under basal concentrations is consistent with the experimentally observed half-lives, thus (nonlinear rate) = (linear rate) where the nonlinear rate is the rate as predicted from some kinetic rate law, and the linear rate is proportional to the inverse of the observed half-life. Then, reordering, 0 = (nonlinear rate) - (linear rate) is (for the moment) the root we wish to find, for each RNA species, giving us the multidimensional function R_aux = (nonlinear rate) - (linear rate) This is the unnormalized residual function; the normalized residuals are R = (nonlinear rate)/(linear rate) - 1 In addition to matching our half-lives we also desire the Michaelis-Menten constants to be non-negative (negative values have no physical meaning). Thus we introduce a penalty term for negative values. TODO (John): explain penalty term The two terms (the residuals R and the negative value penalty Rneg) are combined into one 'loss' function L (alpha is the weighting on the negative value penalty): L = ln((exp(R) + exp(alpha*Rneg))/2) = ln(exp(R) + exp(alpha*Rneg)) - ln(2) The loss function has one element for each RNA. This functional form is a soft (continuous and differentiable) approximation to L = max(R, alpha*Rneg) The root finder, provided with L, will attempt to make each element of L as close to zero as possible, and therefore minimize both R and Rneg. The third-party package Theano is used to create the functions and find an analytic expression for the Jacobian. Parameters ---------- vMax: scalar The total endoRNAse capacity, in dimensions of amount per volume per time. rnaConc: 1-D array, float Concentrations of RNAs (that will be degraded), in dimensions of amount per volume. kDeg: 1-D array, float Experimentally observed degradation rates (computed from half-lives), in dimensions of per unit time. isEndoRnase: 1-D array, bool A vector that is True everywhere that an RNA corresponds to an endoRNAse; that is, an endoRNAse (or endoRNAse subunit) mRNA. alpha: scalar, >0 Regularization weight, used to penalize for negative Michaelis-Menten value predictions during the course of the optimization. Typical value is 0.5. Returns ------- L: function The 'loss' function. Rneg: function The negative Michaelis-Menten constant penalty terms. R: function The residual error (deviation from steady-state). Lp: function The Jacobian of the loss function L with respect to the Michaelis-Menten constants. R_aux: function Unnormalized 'residual' function. L_aux: function Unnormalized 'loss' function. Lp_aux: function Jacobian of the unnormalized 'loss' function. Jacob: function Duplicate with Lp. Jacob_aux: function Duplicate with Lp_aux. Notes ----- The regularization term also includes a penalty for the endoRNAse residuals, as well as a fixed weighting (WFendoR = 0.1). TODO (John): Why is this needed? It seems redundant. TODO (John): How do we know this weight is sufficient? All of the outputs are Theano functions, and take a 1-D array of Michaelis-Menten constants as their sole inputs. All of the functions return a 1-D array, with the exception of the Jacobians, which return matrices. TODO (John): Remove the redundant outputs. TODO (John): Look into removing Theano, since it is no longer maintained. We could use another package with similar functionality (analytic differentiation on algebraic functions), or replace the Theano operations with hand-computed solutions (difficult, as the Jacobian is probably very complicated). TODO (John): Consider redesigning this as an objective minimization problem rather than a root finding problem. TODO (John): Consider replacing the Michaelis-Menten constants with logarithmic equivalents, thereby eliminating the requirement for the negative value penalty. TODO (John): Consider moving this method out of this class, as it is, in fact, a static method, and isn't utilized anywhere within this class. ''' N = rnaConc.size km = T.dvector() # Residuals of non-linear optimization residual = (vMax / km / kDeg) / (1 + (rnaConc / km).sum()) - np.ones(N) residual_aux = (vMax * rnaConc / km) / (1 + (rnaConc / km).sum()) - ( kDeg * rnaConc) # Counting negative Km's (first regularization term) regularizationNegativeNumbers = (np.ones(N) - km / np.abs(km)).sum() / N # Penalties for EndoR Km's, which might be potentially nonf-fitted regularizationEndoR = (isEndoRnase * np.abs(residual)).sum() # Multi objective-based regularization WFendoR = 0.1 # weighting factor to protect Km optimized of EndoRNases regularization = regularizationNegativeNumbers + (WFendoR * regularizationEndoR) # Loss function LossFunction = T.log(T.exp(residual) + T.exp(alpha * regularization)) - T.log(2) LossFunction_aux = T.log( T.exp(residual_aux) + T.exp(alpha * regularization)) - T.log(2) J = theano.gradient.jacobian(LossFunction, km) J_aux = theano.gradient.jacobian(LossFunction_aux, km) Jacob = theano.function([km], J) Jacob_aux = theano.function([km], J_aux) L = theano.function([km], LossFunction) L_aux = theano.function([km], LossFunction_aux) Rneg = theano.function([km], regularizationNegativeNumbers) R = theano.function([km], residual) Lp = theano.function([km], J) Lp_aux = theano.function([km], J_aux) R_aux = theano.function([km], residual_aux) return L, Rneg, R, Lp, R_aux, L_aux, Lp_aux, Jacob, Jacob_aux
class GaussianLikelihoodModel(LikelihoodModel): def __init__(self, **parameters): super(GaussianLikelihoodModel, self).__init__(**parameters) self.sigma0inv = np.linalg.inv(self.sigma0) self.D = self.sigma.shape[0] self.compile() def transition_probability(self, parent, child): child_latent, child_time = child.get_state( 'latent_value'), child.get_state('time') if parent is None: return self.calculate_transition(child_latent, self.mu0, child_time, -1) parent_latent, parent_time = parent.get_state( 'latent_value'), parent.get_state('time') assert parent_time < child_time, (parent_time, child_time) return self.calculate_transition(child_latent, parent_latent, child_time, parent_time) @theanify(T.dvector('state'), T.dvector('parent'), T.dscalar('time'), T.dscalar('parent_time')) def calculate_transition(self, state, parent, time, parent_time): sigma = (time - parent_time) * self.sigma mu = parent logdet = T.log(T.nlinalg.det(sigma)) delta = state - mu pre = -(self.D / 2.0 * np.log(2 * np.pi) + 1 / 2.0 * logdet) return pre + -0.5 * (T.dot( delta, T.dot(T.nlinalg.matrix_inverse(sigma), delta))) @theanify(T.dvector('mean'), T.dmatrix('cov')) def sample(self, mean, cov): e, v = T.nlinalg.eigh(cov) x = RandomStreams().normal(size=(self.D, )) x = T.dot(x, T.sqrt(e)[:, None] * v) return x + mean def sample_transition(self, node, parent): children = node.children time = node.get_state('time') if parent is None: mu0 = self.mu0 sigma0 = self.sigma0 sigma0inv = self.sigma0inv else: mu0 = parent.get_state('latent_value') sigma0 = self.sigma * (time - parent.get_state('time')) sigma0inv = np.linalg.inv(sigma0) mus = [c.get_state('latent_value') for c in children] sigmas = [self.sigma * (c.get_state('time') - time) for c in children] sigmas_inv = [np.linalg.inv(s) for s in sigmas] sigman = np.linalg.inv(sigma0inv + sum(sigmas_inv)) mun = np.dot( sigman, np.dot(sigma0inv, mu0) + sum([np.dot(a, b) for a, b in zip(sigmas_inv, mus)])) return self.sample(mun, sigman) def get_parameters(self): return {"sigma", "sigma0", "mu0"}
def test_infer_shape(self): np.random.seed(42) args = [tt.dvector() for i in range(7)] vals = [np.random.rand(50) for i in range(7)] self._compile_and_check(args, self.op(*args), vals, self.op_class)
def test_pydotprint_return_image(): x = tensor.dvector() ret = theano.printing.pydotprint(x * 2, return_image=True) assert isinstance(ret, (str, bytes))
def test_convolution(self): # print '\n\n*************************************************' # print ' TEST CONVOLUTION' # print '*************************************************' # fixed parameters bsize = 10 # batch size imshp = (28, 28) kshp = (5, 5) nkern = 5 ssizes = ((1, 1), (2, 2), (3, 3), (4, 4)) convmodes = ('full', 'valid') # symbolic stuff bias = tensor.dvector() kerns = tensor.dmatrix() input = tensor.dmatrix() rng = numpy.random.RandomState(3423489) filters = rng.randn(nkern, numpy.prod(kshp)) biasvals = rng.randn(nkern) for mode in ('FAST_COMPILE', 'FAST_RUN'): # , profmode): ttot, ntot = 0, 0 for conv_mode in convmodes: for ss in ssizes: output, outshp = sp.convolve(kerns, kshp, nkern, input,\ imshp, ss, bias=bias, mode=conv_mode) f = function([kerns, bias, input], output, mode=mode) # now test with real values img2d = numpy.arange(bsize * numpy.prod(imshp)).reshape(( \ bsize,) + imshp) img1d = img2d.reshape(bsize, -1) # create filters (need to be flipped to use convolve2d) filtersflipped = numpy.zeros((nkern, ) + kshp) for k in range(nkern): it = reversed(filters[k, :]) for i in range(kshp[0]): for j in range(kshp[1]): filtersflipped[k, i, j] = it.next() # compute output with convolve2d if conv_mode == 'valid': fulloutshp = numpy.array(imshp) - numpy.array(kshp) + 1 else: fulloutshp = numpy.array(imshp) + numpy.array(kshp) - 1 ntime1 = time.time() refout = numpy.zeros((bsize, ) + tuple(fulloutshp) + (nkern, )) for b in range(bsize): for n in range(nkern): refout[b, ..., n] = convolve2d(img2d[b, :, :], filtersflipped[n, ...], conv_mode) ntot += time.time() - ntime1 # need to flatten images bench1 = refout[:, 0::ss[0], 0::ss[1], :].reshape(bsize, -1, nkern) bench1 += biasvals.reshape(1, 1, nkern) # swap the last two dimensions (output needs to be nkern x outshp) bench1 = numpy.swapaxes(bench1, 1, 2) ttime1 = time.time() out1 = f(filters, biasvals, img1d) ttot += time.time() - ttime1 temp = bench1.flatten() - out1.flatten() assert (temp < 1e-5).all()
def test_scan_debugprint5(): k = tensor.iscalar("k") A = tensor.dvector("A") # Symbolic description of the result result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A, outputs_info=tensor.ones_like(A), non_sequences=A, n_steps=k) final_result = tensor.grad(result[-1].sum(), A) output_str = theano.printing.debugprint(final_result, file='str') lines = [] for line in output_str.split('\n'): lines += [line] expected_output = """Subtensor{int64} [@A] '' |for{cpu,grad_of_scan_fn}.1 [@B] '' | |Elemwise{sub,no_inplace} [@C] '' | | |Subtensor{int64} [@D] '' | | | |Shape [@E] '' | | | | |for{cpu,scan_fn} [@F] '' | | | | |k [@G] | | | | |IncSubtensor{Set;:int64:} [@H] '' | | | | | |Alloc [@I] '' | | | | | | |TensorConstant{0.0} [@J] | | | | | | |Elemwise{add,no_inplace} [@K] '' | | | | | | | |k [@G] | | | | | | | |Subtensor{int64} [@L] '' | | | | | | | |Shape [@M] '' | | | | | | | | |Rebroadcast{0} [@N] '' | | | | | | | | |DimShuffle{x,0} [@O] '' | | | | | | | | |Elemwise{second,no_inplace} [@P] '' | | | | | | | | |A [@Q] | | | | | | | | |DimShuffle{x} [@R] '' | | | | | | | | |TensorConstant{1.0} [@S] | | | | | | | |Constant{0} [@T] | | | | | | |Subtensor{int64} [@U] '' | | | | | | |Shape [@V] '' | | | | | | | |Rebroadcast{0} [@N] '' | | | | | | |Constant{1} [@W] | | | | | |Rebroadcast{0} [@N] '' | | | | | |ScalarFromTensor [@X] '' | | | | | |Subtensor{int64} [@L] '' | | | | |A [@Q] | | | |Constant{0} [@Y] | | |TensorConstant{1} [@Z] | |Subtensor{:int64:} [@BA] '' | | |Subtensor{::int64} [@BB] '' | | | |Subtensor{:int64:} [@BC] '' | | | | |for{cpu,scan_fn} [@F] '' | | | | |Constant{-1} [@BD] | | | |Constant{-1} [@BE] | | |ScalarFromTensor [@BF] '' | | |Elemwise{sub,no_inplace} [@C] '' | |Subtensor{:int64:} [@BG] '' | | |Subtensor{:int64:} [@BH] '' | | | |Subtensor{::int64} [@BI] '' | | | | |for{cpu,scan_fn} [@F] '' | | | | |Constant{-1} [@BJ] | | | |Constant{-1} [@BK] | | |ScalarFromTensor [@BL] '' | | |Elemwise{sub,no_inplace} [@C] '' | |Subtensor{::int64} [@BM] '' | | |IncSubtensor{Inc;int64::} [@BN] '' | | | |Elemwise{second,no_inplace} [@BO] '' | | | | |for{cpu,scan_fn} [@BP] '' | | | | | |k [@G] | | | | | |IncSubtensor{Set;:int64:} [@H] '' | | | | | |A [@Q] | | | | |DimShuffle{x,x} [@BQ] '' | | | | |TensorConstant{0.0} [@J] | | | |IncSubtensor{Inc;int64} [@BR] '' | | | | |Elemwise{second,no_inplace} [@BS] '' | | | | | |Subtensor{int64::} [@BT] '' | | | | | | |for{cpu,scan_fn} [@BP] '' | | | | | | |Constant{1} [@BU] | | | | | |DimShuffle{x,x} [@BV] '' | | | | | |TensorConstant{0.0} [@J] | | | | |Elemwise{second} [@BW] '' | | | | | |Subtensor{int64} [@BX] '' | | | | | | |Subtensor{int64::} [@BT] '' | | | | | | |Constant{-1} [@BY] | | | | | |DimShuffle{x} [@BZ] '' | | | | | |Elemwise{second,no_inplace} [@CA] '' | | | | | |Sum{acc_dtype=float64} [@CB] '' | | | | | | |Subtensor{int64} [@BX] '' | | | | | |TensorConstant{1.0} [@S] | | | | |Constant{-1} [@BY] | | | |Constant{1} [@BU] | | |Constant{-1} [@CC] | |Alloc [@CD] '' | | |TensorConstant{0.0} [@J] | | |Elemwise{add,no_inplace} [@CE] '' | | | |Elemwise{sub,no_inplace} [@C] '' | | | |TensorConstant{1} [@Z] | | |Subtensor{int64} [@CF] '' | | |Shape [@CG] '' | | | |A [@Q] | | |Constant{0} [@CH] | |A [@Q] |Constant{-1} [@CI] Inner graphs of the scan ops: for{cpu,grad_of_scan_fn}.1 [@B] '' >Elemwise{add,no_inplace} [@CJ] '' > |Elemwise{mul} [@CK] '' > | |<TensorType(float64, vector)> [@CL] -> [@BM] > | |A_copy [@CM] -> [@Q] > |<TensorType(float64, vector)> [@CN] -> [@BM] >Elemwise{add,no_inplace} [@CO] '' > |Elemwise{mul} [@CP] '' > | |<TensorType(float64, vector)> [@CL] -> [@BM] > | |<TensorType(float64, vector)> [@CQ] -> [@BA] > |<TensorType(float64, vector)> [@CR] -> [@CD] for{cpu,scan_fn} [@F] '' >Elemwise{mul,no_inplace} [@CS] '' > |<TensorType(float64, vector)> [@CT] -> [@H] > |A_copy [@CU] -> [@Q] for{cpu,scan_fn} [@F] '' >Elemwise{mul,no_inplace} [@CS] '' for{cpu,scan_fn} [@F] '' >Elemwise{mul,no_inplace} [@CS] '' for{cpu,scan_fn} [@BP] '' >Elemwise{mul,no_inplace} [@CS] '' for{cpu,scan_fn} [@BP] '' >Elemwise{mul,no_inplace} [@CS] ''""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def build_custom_ann(self, layer_list, ann_type="rlu", nb=784): ''' ''' layer_list = [nb] + layer_list input = T.dvector('input') target = T.wvector('target') w_list = [] x_list = [] w_list.append( theano.shared( np.random.uniform(low=-.1, high=.1, size=(layer_list[0], layer_list[1])))) if ann_type == "rlu": x_list.append( T.switch( T.dot(input, w_list[0]) > 0, T.dot(input, w_list[0]), 0)) elif ann_type == "sigmoid": x_list.append(Tann.sigmoid(T.dot(input, w_list[0]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(input, w_list[0]))) for count in range(0, len(layer_list) - 2): w_list.append( theano.shared( np.random.uniform(low=-.1, high=.1, size=(layer_list[count + 1], layer_list[count + 2])))) if ann_type == "rlu": x_list.append( T.switch( T.dot(x_list[count], w_list[count + 1]) > 0, T.dot(x_list[count], w_list[count + 1]), 0)) elif ann_type == "sigmoid": x_list.append( Tann.sigmoid(T.dot(x_list[count], w_list[count + 1]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(x_list[count], w_list[count + 1]))) w_list.append( theano.shared( np.random.uniform(low=-.1, high=.1, size=(layer_list[-1], 10)))) x_list.append( T.switch( T.dot(x_list[-1], w_list[-1]) > 0, T.dot(x_list[-1], w_list[-1]), 0)) error = T.sum(pow((target - x_list[-1]), 2)) params = w_list gradients = T.grad(error, params) backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x_list[-1], allow_input_downcast=True)
import theano import theano.tensor as T from adio import * Ops = { 'exp': T.exp, 'log': T.log, 'grad': (lambda y, x: T.grad(y, x, disconnected_inputs='ignore')), 'new_scalar': T.dscalar, 'new_vector': (lambda _: T.dvector()), 'function': theano.function }
import theano import theano.tensor as T import numpy as np a = T.dvector() b = T.nnet.softmax(a)[0] myf = theano.function([a], b) ok1 = myf(np.arange(10)) print ok1
def __init__(self, dt, rng, d=25, C=5, V=None, W=None, b=None, L=None, Ws=None, activation=T.tanh): self.d = d self.C = C self.vsize = len(dt.vocab_list) print 'The provided vocabulary has %i words' % self.vsize self.dt = dt n_in = 1000 # use this as an order of magnitude for now if W is None: W_values = numpy.asarray(rng.uniform(low=-numpy.sqrt(1.0 / n_in), high=numpy.sqrt(1.0 / n_in), size=(d, 2 * d)), dtype=theano.config.floatX) W = theano.shared(value=W_values, name='W', borrow=True) # possible bias if b is None: b_values = numpy.zeros((d, ), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b', borrow=True) # r parameter for initial word vector matrix # taken from paper r = 0.0001 if L is None: L_values = numpy.asarray(rng.uniform(low=-r, high=r, size=(d, self.vsize)), dtype=theano.config.floatX) L = theano.shared(value=L_values, name='L', borrow=True) if Ws is None: Ws_values = numpy.asarray(rng.uniform(low=-r, high=r, size=(C, d)), dtype=theano.config.floatX) Ws = theano.shared(value=Ws_values, name='Ws', borrow=True) # rearrange indices so V in R^(d*2d*2d) if V is None: V_values = numpy.asarray(rng.uniform( low=-numpy.sqrt(1.0 / (n_in)**2), high=numpy.sqrt(1.0 / (n_in)**2), size=(d, 2 * d, 2 * d)), dtype=theano.config.floatX) V = theano.shared(value=V_values, name='V', borrow=True) self.W = W self.b = b self.L = L self.Ws = Ws self.V = V # list of parameters self.theta = [self.W, self.b, self.L, self.Ws, self.V] self.lambda_const = 0.01 self.param_error = self.lambda_const * ((self.V**2).sum() + \ (self.W**2).sum() + \ (self.Ws**2).sum() + \ (self.L**2).sum() + \ (self.b**2).sum()) ym = T.dmatrix('ym') tm = T.dmatrix('tm') self.cross_entropy = theano.function([ym, tm], (tm * T.log(ym)).sum() * -1.0) v1 = T.dvector('v1') v2 = T.dvector('v2') # direct sum v = T.concatenate([v1, v2]) pair_output = T.dot(T.dot(self.V, v), v) + T.dot(self.W, v) + b self.pair_map = theano.function([v1, v2], pair_output) self.softmax = theano.function([v1], softmax(T.dot(Ws, v1)).flatten())
import theano import theano.tensor as T from theano import function def jacobian(fn, args): J, updates = theano.scan(lambda i, fn, args: T.grad(fn[i], args), sequences=T.arange(fn.shape[0]), non_sequences=[fn, args]) f = function([args], J, updates=updates) return f def jacobian_times_vector(fn, args, vector, weights): J = T.Rop(fn, weights, vector) f = function([weights, vector, fn], J) return f if __name__ == "__main__": x = T.dvector("x") y = 1 / (1 + T.exp(-x)) f = jacobian(y, x) print f([4, 4])
L = 0 R = 0 if __name__ == "__main__": #x = numpy.load('b_train.npy').astype(my_dtype) #x_corrupt = SaltPepperCorrupt(x) data = PatchSource('test_images/lena.png', (6,6) , 'test_images/1.bmp') dimensions = [ data.length, 5*data.length, 5*data.length ] sda = SDA(dimensions) #sda = SDA.load('sda_line') Pretrain(sda, data, 2, 0.004) FineTuning(sda, data, 2, 0.0005) input = T.dvector() f = theano.function( inputs = [input], outputs = sda.goBack(sda.goThrough(input,0,sda.n_layers), sda.n_layers, 0) ) m = recover(data, f) plt.subplot(131) plt.imshow(m, cmap=matplotlib.cm.gray) plt.subplot(132) plt.imshow(data.m, cmap=matplotlib.cm.gray) plt.subplot(133) plt.imshow(data.m_corrupt, cmap=matplotlib.cm.gray) plt.show() print PSNR(m, data.m) p = data.nextPatch(numpy.random.randint(data.max)) plt.subplot(131) plt.imshow(numpy.reshape(f(p[1]).tolist(), data.patch_shape), cmap=matplotlib.cm.gray)
def simulate_symbolic_online_RL_algorithm(mdp, num_episodes, max_iterations): real_actions = mdp.actions(None) actions = np.arange(len(real_actions)) # these theano variables are used to define the symbolic input of the network features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') learning_rate_symbol = T.dscalar('learning_rate') h1 = HiddenLayer(n_vis=INPUT_DIM, n_hid=HIDDEN_DIM, layer_name='h1') h2 = HiddenLayer(n_vis=HIDDEN_DIM, n_hid=HIDDEN_DIM, layer_name='h2') h3 = HiddenLayer(n_vis=HIDDEN_DIM, n_hid=HIDDEN_DIM, layer_name='h3') h4 = HiddenLayer(n_vis=HIDDEN_DIM, n_hid=OUTPUT_DIM, layer_name='h4') # h5 = HiddenLayer(n_vis=HIDDEN_DIM, n_hid=HIDDEN_DIM, layer_name='h5') # h6 = HiddenLayer(n_vis=HIDDEN_DIM, n_hid=OUTPUT_DIM, layer_name='h6') layers = [h1, h2, h3, h4] #, h3, h4, h5, h6] learning_rate = 1e-2 explorationProb = .4 regularization_weight = 1e-5 momentum_rate = 9e-1 qnetwork = QNetwork(layers, discount=mdp.discount, momentum_rate=momentum_rate, regularization_weight=regularization_weight) exploration_reduction = (explorationProb - MIN_EXPLORATION_PROB) / num_episodes learning_rate_reduction = (learning_rate - MIN_LEARNING_RATE) / num_episodes # this call gets the symbolic output of the network along with the parameter updates loss, updates = qnetwork.get_loss_and_updates(features, action, reward, next_features, learning_rate_symbol) print 'Building Training Function...' # this defines the theano symbolic function used to train the network # 1st argument is a list of inputs, here the symbolic variables above # 2nd argument is the symbolic output expected # 3rd argument is the dictionary of parameter updates # 4th argument is the compilation mode train_model = theano.function([ theano.Param(features, default=np.zeros(INPUT_DIM)), theano.Param(action, default=0), theano.Param(reward, default=0), theano.Param(next_features, default=np.zeros(HIDDEN_DIM)), learning_rate_symbol ], outputs=loss, updates=updates, mode='FAST_RUN') get_action = theano.function([features], qnetwork.get_action(features)) total_rewards = [] total_losses = [] weight_magnitudes = [] print 'Starting Training...' replay_mem = replay_memory.ReplayMemory() for episode in xrange(num_episodes): state = np.array(mdp.start_state) total_reward = 0 total_loss = 0 for iteration in xrange(max_iterations): if random.random() < explorationProb: action = random.choice(actions) else: action = get_action(state) real_action = real_actions[action] transitions = mdp.succAndProbReward(state, real_action) if len(transitions) == 0: # loss += train_model(state, action, 0, next_features) break # Choose a random transition i = sample([prob for newState, prob, reward in transitions]) newState, prob, reward = transitions[i] newState = np.array(newState) sars_tuple = (state, action, np.clip(reward, -1, 1), newState) replay_mem.store(sars_tuple) num_samples = 5 if replay_mem.isFull() else 1 for i in range(0, num_samples): random_train_tuple = replay_mem.sample() sample_state = random_train_tuple[0] sample_action = random_train_tuple[1] sample_reward = random_train_tuple[2] sample_new_state = random_train_tuple[3] total_loss += train_model(sample_state, sample_action, sample_reward, sample_new_state, learning_rate) total_reward += reward state = newState explorationProb -= exploration_reduction learning_rate -= learning_rate_reduction total_rewards.append(total_reward * mdp.discount**iteration) total_losses.append(total_loss) weight_magnitude = qnetwork.get_weight_magnitude() weight_magnitudes.append(weight_magnitude) print 'episode: {}\t\t loss: {}\t\t reward: {}\t\tweight magnitude: {}'.format( episode, round(total_loss, 2), total_reward, weight_magnitude) # return the list of rewards attained return total_rewards, total_losses
def test_fail(self): """ Test that conv2d fails for dimensions other than 2 or 3. """ self.assertRaises(Exception, conv.conv2d, T.dtensor4(), T.dtensor3()) self.assertRaises(Exception, conv.conv2d, T.dtensor3(), T.dvector())
expr = get_expr_rff_feature_map_component(x, omega, u) return T.grad(expr, x) def get_expr_rff_feature_map_component_hessian(x, omega, u): expr = get_expr_rff_feature_map_component(x, omega, u) return T.hessian(expr, x) def get_expr_rff_feature_map_component_third_order_tensor(x, omega, u): grad = get_expr_rff_feature_map_component_grad(x, omega, u) G3, updates = theano.scan(lambda i, grad, x: T.hessian(grad[i], x), sequences=T.arange(grad.shape[0]), non_sequences=[grad, x]) return G3, updates # theano variables x = T.dvector('x') y = T.dvector('y') sigma = T.dscalar('sigma') # compile function handles gaussian_kernel_theano = function(inputs=[x, y, sigma], outputs=get_expr_gaussian_kernel( x, y, sigma)) gaussian_kernel_grad_theano = function( inputs=[x, y, sigma], outputs=get_expr_gaussian_kernel_grad(x, y, sigma)) gaussian_kernel_hessian_theano = function( inputs=[x, y, sigma], outputs=get_expr_gaussian_kernel_hessian(x, y, sigma)) G3, updates = get_expr_gaussian_kernel_third_order_tensor(x, y, sigma)
Y_train_rouge1 = Y_train[:,0] Y_train_rouge2 = Y_train[:,1] Y_train_rougesu4 = Y_train[:,2] Y_valid_rouge1 = Y_valid[:,0] Y_valid_rouge2 = Y_valid[:,1] Y_valid_rougesu4 = Y_valid[:,2] embed_matrix = np.load("data/word2vec/embed_matrix.npy") embed_matrix = np.array(embed_matrix,dtype=np.float64) print(embed_matrix.shape, embed_matrix.dtype) X = T.imatrix("X") Y = T.dvector("Y") minibatch_size = 100 sentence_length = X_train.shape[1] embsize = embed_matrix.shape[1] vocab_size = embed_matrix[0] sentence_shape = (minibatch_size, 1, sentence_length, embsize) filter_shape = (20, 1, 5, embed_matrix.shape[1]) pool_size = (2,1) # maybe wrong in 0_index: is UNK work, Yoon Kim set to vector 0 project_layer = ProjectionLayer(rng,X,vocab_size,embsize,(minibatch_size,sentence_length),embed_matrix=embed_matrix) #conv_layer conv_layer = LenetConvPoolLayer(rng, project_layer.output,sentence_shape,filter_shape,pool_size)
def test_scan_debugprint5(): k = tensor.iscalar("k") A = tensor.dvector("A") # Symbolic description of the result result, updates = theano.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=tensor.ones_like(A), non_sequences=A, n_steps=k, ) final_result = tensor.grad(result[-1].sum(), A) output_str = theano.printing.debugprint(final_result, file="str") lines = output_str.split("\n") expected_output = """Subtensor{int64} [id A] '' |for{cpu,grad_of_scan_fn}.1 [id B] '' | |Elemwise{sub,no_inplace} [id C] '' | | |Subtensor{int64} [id D] '' | | | |Shape [id E] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |k [id G] | | | | |IncSubtensor{Set;:int64:} [id H] '' | | | | | |AllocEmpty{dtype='float64'} [id I] '' | | | | | | |Elemwise{add,no_inplace} [id J] '' | | | | | | | |k [id G] | | | | | | | |Subtensor{int64} [id K] '' | | | | | | | |Shape [id L] '' | | | | | | | | |Rebroadcast{0} [id M] '' | | | | | | | | |InplaceDimShuffle{x,0} [id N] '' | | | | | | | | |Elemwise{second,no_inplace} [id O] '' | | | | | | | | |A [id P] | | | | | | | | |InplaceDimShuffle{x} [id Q] '' | | | | | | | | |TensorConstant{1.0} [id R] | | | | | | | |Constant{0} [id S] | | | | | | |Subtensor{int64} [id T] '' | | | | | | |Shape [id U] '' | | | | | | | |Rebroadcast{0} [id M] '' | | | | | | |Constant{1} [id V] | | | | | |Rebroadcast{0} [id M] '' | | | | | |ScalarFromTensor [id W] '' | | | | | |Subtensor{int64} [id K] '' | | | | |A [id P] | | | |Constant{0} [id X] | | |TensorConstant{1} [id Y] | |Subtensor{:int64:} [id Z] '' | | |Subtensor{::int64} [id BA] '' | | | |Subtensor{:int64:} [id BB] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |Constant{-1} [id BC] | | | |Constant{-1} [id BD] | | |ScalarFromTensor [id BE] '' | | |Elemwise{sub,no_inplace} [id C] '' | |Subtensor{:int64:} [id BF] '' | | |Subtensor{:int64:} [id BG] '' | | | |Subtensor{::int64} [id BH] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |Constant{-1} [id BI] | | | |Constant{-1} [id BJ] | | |ScalarFromTensor [id BK] '' | | |Elemwise{sub,no_inplace} [id C] '' | |Subtensor{::int64} [id BL] '' | | |IncSubtensor{Inc;int64::} [id BM] '' | | | |Elemwise{second,no_inplace} [id BN] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |InplaceDimShuffle{x,x} [id BO] '' | | | | |TensorConstant{0.0} [id BP] | | | |IncSubtensor{Inc;int64} [id BQ] '' | | | | |Elemwise{second,no_inplace} [id BR] '' | | | | | |Subtensor{int64::} [id BS] '' | | | | | | |for{cpu,scan_fn} [id F] '' | | | | | | |Constant{1} [id BT] | | | | | |InplaceDimShuffle{x,x} [id BU] '' | | | | | |TensorConstant{0.0} [id BP] | | | | |Elemwise{second} [id BV] '' | | | | | |Subtensor{int64} [id BW] '' | | | | | | |Subtensor{int64::} [id BS] '' | | | | | | |Constant{-1} [id BX] | | | | | |InplaceDimShuffle{x} [id BY] '' | | | | | |Elemwise{second,no_inplace} [id BZ] '' | | | | | |Sum{acc_dtype=float64} [id CA] '' | | | | | | |Subtensor{int64} [id BW] '' | | | | | |TensorConstant{1.0} [id R] | | | | |Constant{-1} [id BX] | | | |Constant{1} [id BT] | | |Constant{-1} [id CB] | |Alloc [id CC] '' | | |TensorConstant{0.0} [id BP] | | |Elemwise{add,no_inplace} [id CD] '' | | | |Elemwise{sub,no_inplace} [id C] '' | | | |TensorConstant{1} [id Y] | | |Subtensor{int64} [id CE] '' | | |Shape [id CF] '' | | | |A [id P] | | |Constant{0} [id CG] | |A [id P] |Constant{-1} [id CH] Inner graphs of the scan ops: for{cpu,grad_of_scan_fn}.1 [id B] '' >Elemwise{add,no_inplace} [id CI] '' > |Elemwise{mul} [id CJ] '' > | |<TensorType(float64, vector)> [id CK] -> [id BL] > | |A_copy [id CL] -> [id P] > |<TensorType(float64, vector)> [id CM] -> [id BL] >Elemwise{add,no_inplace} [id CN] '' > |Elemwise{mul} [id CO] '' > | |<TensorType(float64, vector)> [id CK] -> [id BL] > | |<TensorType(float64, vector)> [id CP] -> [id Z] > |<TensorType(float64, vector)> [id CQ] -> [id CC] for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CR] '' > |<TensorType(float64, vector)> [id CP] -> [id H] > |A_copy [id CL] -> [id P] for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CR] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CR] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CR] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CR] ''""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def setup_vars(self): super(SimpleRegressor, self).setup_vars() # the k variable holds the target output for input x. self.vars.k = T.dvector('k') self.inputs.append(self.vars.k)
examples = 1000 features = 100 hidden = 10 training_steps = 1000 #procedures def l2(v): return T.sum(v**2) #main D = (numpy.random.randn(examples, features), numpy.random.randint(size=examples, low=0, high=2)) x = T.dmatrix("x") y = T.dvector("y") w1 = theano.shared(numpy.random.randn(features, hidden), name="w1") b1 = theano.shared(numpy.zeros(hidden), name="b1") w2 = theano.shared(numpy.random.randn(hidden), name="w2") b2 = theano.shared(0., name="b2") p1 = T.tanh(T.dot(x, w1) + b1) p2 = T.tanh(T.dot(p1, w2) + b2) prediction = p2 > 0.5 error = T.nnet.binary_crossentropy(p2, y) loss = error.mean() + 0.01 * (l2(w1) + l2(w2)) gw1, gb1, gw2, gb2 = T.grad(loss, [w1, b1, w2, b2]) train = theano.function(inputs=[x, y], outputs=[p2, error], updates=((w1, w1 - 0.1 * gw1), (b1, b1 - 0.1 * gb1), (w2, w2 - 0.1 * gw2), (b2, b2 - 0.1 * gb2))) predict = theano.function(inputs=[x], outputs=[prediction])
''' This program fastmult.py explores operations for multiplying matrices of derivatives with vectors for performance gains. This avoids actually calculating the matrices of derivatives. ''' # pylint: disable = bad-whitespace, invalid-name, no-member, bad-continuation import theano import theano.tensor as T # First, let's clarify the Jacobian operation. W = T.dmatrix() x = T.dvector() y = T.dot(x,W) J1 = theano.gradient.jacobian(y,x) J2 = theano.gradient.jacobian(y,W) f1 = theano.function([x,W], J1) f2 = theano.function([x,W], J2) print f1([0,1],[[1,1],[1,1]]) print f2([0,1],[[1,1],[1,1]]) # The R-operator right-multiplies a Jacobian by a vector. W = T.dmatrix() V = T.dmatrix() x = T.dvector() ''' Note that many but not all ops