def custom_svrg2(loss, params, m, learning_rate=0.01, objective=None, data=None, target=None, getpred=None): theano.pp(loss) grads = theano.grad(loss, params) n = data.shape[0] updates = OrderedDict() rng = T.shared_randomstreams.RandomStreams(seed=149) for param, grad in zip(params, grads): value = param.get_value(borrow=True) mu = grad / n def oneStep(w): t = rng.choice(size=(1,), a=n) loss_part_tilde = objective(getpred(data[t], param), target[t]) loss_part_tilde = loss_part_tilde.mean() g_tilde = theano.grad(loss_part_tilde, param) loss_part = objective(getpred(data[t], w), target[t]) loss_part = loss_part.mean() g = theano.grad(loss_part, w) w = w - learning_rate * (g - g_tilde + mu) return w w_tilde, scan_updates = theano.scan(fn=oneStep, outputs_info=param, n_steps=m) updates.update(scan_updates) updates[param] = w_tilde[-1] return updates
def bsgd1(nn, data, name='sgd', lr=0.022, alpha=0.3, batch_size=500, epochs = 10): train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] # valid_y_numpy = y_numpy[0] # test_y_numpy = y_numpy[1] test_y_numpy = map_48_to_39(test_y_numpy) valid_y_numpy = map_48_to_39(valid_y_numpy) print test_y_numpy num_samples = train_set_x.get_value(borrow=True).shape[0] num_batches = num_samples / batch_size layers = nn.layers x = T.matrix('x') y = T.ivector('y') y_eval = T.ivector('y_eval') cost = nn.cost(x, y) accuracy = nn.calcAccuracy(x, y) params = nn.params delta_params = nn.delta_params print theano.pp(cost) # theano.pp(accuracy) p_grads = [T.grad(cost=cost, wrt = p) for p in params] # implementing gradient descent with momentum print p_grads updates = OrderedDict() for dp, gp in zip(delta_params, p_grads): updates[dp] = dp*alpha - gp*lr for p, dp in zip(params, delta_params): updates[p] = p + updates[dp] # updates = [(p, p - lr*gp) for p, gp in zip(params, p_grads)] index = T.ivector('index') batch_sgd_train = theano.function(inputs=[index], outputs=[cost, accuracy], updates=updates, givens={x: train_set_x[index], y:train_set_y[index]}) batch_sgd_valid = theano.function(inputs=[], outputs=[nn.calcAccuracy(x, y), nn.calcAccuracyTimit(x,y)], givens={x: valid_set_x, y:valid_set_y}) batch_sgd_test = theano.function(inputs=[], outputs=nn.calcAccuracy(x, y), givens={x: test_set_x, y:test_set_y}) indices = np.arange(num_samples, dtype=np.dtype('int32')) np.random.shuffle(indices) for n in xrange(epochs): np.random.shuffle(indices) for i in xrange(num_batches): batch = indices[i*batch_size: (i+1)*batch_size] batch_sgd_train(batch) # y_np = y.get_value() # print y.eval() print "epoch:", n, " validation accuracy:", batch_sgd_valid() print batch_sgd_test()
def compute_gradients(self): # maybe doesn't need to be a class variable self.grads = T.grad(self.cost, wrt=self.tparams.values()) #lrate: learning rate self.f_populate_gradients, self.f_update_params = self.optimizer() # ===================================================================== # print out the computational graph and make an image of it too if self.debug and False: # util.colorprint("Following is the graph of the final hidden layer:", "blue") # final_activation_fn = theano.function([self.input], final_activation) # theano.printing.debugprint(final_activation_fn.maker.fgraph.outputs[0]) # util.colorprint("Also, saving png of computational graph:", "blue") # theano.printing.pydotprint(final_activation_fn, # outfile="output/lmlp_final_act_viz.png", # compact=True, # scan_graphs=True, # var_with_name_simple=True) util.colorprint("Following is the graph of the first of the derivatives:", "blue") final_grad_fn = theano.function([self.input, self.y], self.grads[0]) theano.printing.debugprint(final_grad_fn.maker.fgraph.outputs[0]) util.colorprint("Yay colorprinted:", "blue") print theano.pp(self.final_activation) util.colorprint("Also, saving png of computational graph:", "blue") theano.printing.pydotprint(final_grad_fn, outfile="output/lmlp_final_grad_viz.png", compact=True, scan_graphs=True, var_with_name_simple=True)
def derivative(): x = T.dscalar('x') y = x ** 2 gy = T.grad(y, x) print(pp(gy)) f = function([x], gy) print(f(4)) print(np.allclose(f(94.2), 94.2 * 2)) print(pp(f.maker.fgraph.outputs[0]))
def test_examples_4(self): from theano import pp x = T.dscalar('x') y = x**2 gy = T.grad(y, x) pp(gy) # print out the gradient prior to optimization '((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))' f = function([x], gy) assert f(4) == array(8.0) assert f(94.2) == array(188.40000000000001)
def gradient(a): x = T.dscalar('x') y = x**2 z = 1/x gy = T.grad(y, x) gz = T.grad(z, x) print(th.pp(gy)) print(th.pp(gz)) f = th.function([x], gy) g = th.function([x], gz) print(f(a)) print(g(a))
def show(self, verbose=0): """ print a summary of current workspace to stdout """ print 'inferenceArgs', self.ws.inferenceArgs print 'inferenceExpr', theano.pp(self.ws.inferenceExpr) if verbose >= 1: print 'debugprint inferenceExpr:' theano.printing.debugprint(self.ws.inferenceExpr) if self.ws.dataLossExpr: print 'dataLossArgs', self.ws.dataLossArgs print 'dataLossExpr', theano.pp(self.ws.dataLossExpr) print 'debugprint dataLossExpr:' theano.printing.debugprint(self.ws.dataLossExpr)
def scalar(): x = T.scalar('x') y = T.scalar('y') z = x + y sum = theano.function(inputs=[x, y], outputs=[z]) print(sum(1, 2)) print(theano.pp(z))
def ppth(obj, fancy=True, graph=False, fid="/Users/keithd/temp/pydot_graph", fmt="pdf"): if graph: theano.printing.pydotprint(obj, outfile=fid, format=fmt) elif fancy: theano.printing.debugprint(obj) else: return theano.pp(obj)
def cached_function(inputs, outputs): """Find and load a file with a cached tensor function. Returns ------- A callable object that will calculate outputs from inputs. """ with Message("Hashing theano fn"): if hasattr(outputs, '__len__'): hash_content = tuple(map(theano.pp, outputs)) else: hash_content = theano.pp(outputs) cache_key = hex(hash(hash_content) & (2**64 - 1))[:-1] cache_dir = osp.expanduser('~/.hierctrl_cache') cache_file = cache_dir + ("/%s.pkl" % cache_key) if osp.isfile(cache_file): with Message("unpickling"): with open(cache_file, "rb") as f: try: return pickle.load(f) except Exception: pass with Message("compiling"): fun = compile_function(inputs, outputs) with Message("picking"): with open(cache_file, "wb") as f: pickle.dump(fun, f, protocol=pickle.HIGHEST_PROTOCOL) return fun
def ppth(obj, fancy=True, graph=False, fid="/Users/hakon0601/Dropbox/Python/AIProg/AIProg_Module_5/", fmt="pdf"): if graph: theano.printing.pydotprint(obj, outfile=fid, format=fmt) elif fancy: theano.printing.debugprint(obj) else: return theano.pp(obj)
def cached_function(inputs, outputs): import theano with Message("Hashing theano fn"): if hasattr(outputs, "__len__"): hash_content = tuple(map(theano.pp, outputs)) else: hash_content = theano.pp(outputs) cache_key = hex(hash(hash_content) & (2 ** 64 - 1))[:-1] cache_dir = Path("~/.hierctrl_cache") cache_dir = cache_dir.expanduser() cache_dir.mkdir_p() cache_file = cache_dir / ("%s.pkl" % cache_key) if cache_file.exists(): with Message("unpickling"): with open(cache_file, "rb") as f: try: return pickle.load(f) except Exception: pass with Message("compiling"): fun = compile_function(inputs, outputs) with Message("picking"): with open(cache_file, "wb") as f: pickle.dump(fun, f, protocol=pickle.HIGHEST_PROTOCOL) return fun
def cached_function(inputs, outputs): import theano with Message("Hashing theano fn"): if hasattr(outputs, '__len__'): hash_content = tuple(map(theano.pp, outputs)) else: hash_content = theano.pp(outputs) cache_key = hex(hash(hash_content) & (2**64 - 1))[:-1] cache_dir = Path('~/.hierctrl_cache') cache_dir = cache_dir.expanduser() cache_dir.mkdir_p() cache_file = cache_dir / ('%s.pkl' % cache_key) if cache_file.exists(): with Message("unpickling"): with open(cache_file, "rb") as f: try: return pickle.load(f) except Exception: pass with Message("compiling"): fun = compile_function(inputs, outputs) with Message("picking"): with open(cache_file, "wb") as f: pickle.dump(fun, f, protocol=pickle.HIGHEST_PROTOCOL) return fun
def bsgd(nn, data, name='sgd', lr=0.03, epochs=120, batch_size=500, momentum=0): train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] # exit() num_samples = train_set_x.get_value(borrow=True).shape[0] num_batches = num_samples / batch_size layers = nn.layers x = T.matrix('x') y = T.ivector('y') cost = nn.cost(x, y) accuracy = nn.calcAccuracy(x, y) params = nn.params print theano.pp(cost) p_grads = [T.grad(cost=cost, wrt = p) for p in params] print p_grads updates = [(p, p - lr*gp) for p, gp in zip(nn.params, p_grads)] index = T.ivector('index') batch_sgd_train = theano.function(inputs=[index], outputs=[cost, accuracy], updates=updates, givens={x: train_set_x[index], y: train_set_y[index]}) batch_sgd_valid = theano.function(inputs=[], outputs=nn.calcAccuracy(x, y), givens={x: valid_set_x, y: valid_set_y}) batch_sgd_test = theano.function(inputs=[], outputs=nn.calcAccuracy(x, y), givens={x: test_set_x, y: test_set_y}) # indices = range(num_samples) indices = np.arange(num_samples, dtype=np.dtype('int32')) np.random.shuffle(indices) for n in xrange(epochs): np.random.shuffle(indices) for nb in xrange(num_batches): batch = indices[nb*batch_size : (nb+1) * batch_size ] batch_sgd_train(batch) print "Validation Accuracy:", batch_sgd_valid() print "Final Test Accuracy:", batch_sgd_test()
def debugVar(v,depth=0,maxdepth=10): if depth>maxdepth: print '...' else: print '| '*(depth+1), print 'var: name',v.name,'type',type(v),'def',theano.pp(v) for a in v.get_parents(): debugApply(a,depth=depth+1,maxdepth=maxdepth)
def ppth(obj, fancy=True, graph=False, fid='/Users/hakon0601/Dropbox/Python/AIProg/AIProg_Module_5/', fmt='pdf'): if graph: theano.printing.pydotprint(obj, outfile=fid, format=fmt) elif fancy: theano.printing.debugprint(obj) else: return theano.pp(obj)
def __init__(self): inSize = BrainBase.inputVectorSize h1Size = 64 self.vIn = vIn = tt.dvector('in') self.vM1 = vM1 = tt.dmatrix('m1') self.vM2 = vM2 = tt.dvector('m2') self.vW1 = vW1 = tt.dvector('w1') self.vW2 = vW2 = tt.dscalar('w2') vH1 = sigmoid(tt.dot(vIn, vM1) + vW1) self.vOut = vOut = sigmoid(tt.dot(vH1, vM2) + vW2) t.pp(vOut) self.evalFun = t.function([vIn, vW1, vM1, vW2, vM2], vOut) self.m1 = 1 / math.sqrt(inSize) * npr.standard_normal((inSize, h1Size)) self.m2 = 1 / math.sqrt(h1Size) * npr.standard_normal((h1Size, )) self.w1 = 1 / math.sqrt(inSize) * npr.standard_normal((h1Size, )) self.w2 = 1 / math.sqrt(h1Size) * npr.standard_normal()
def getp(si, tli, tri, tai, x_tm1, e, l, Wl, Wr, Wv): xx = T.concatenate([x_tm1, [self.x0]], axis=0) xsi = T.dot(e[si], Wv) xsi = xsi[0] pl, pl_ = theano.scan(lambda j, Wl, x, l, tli: T.dot(x[tli[j]], Wl[j]) * l[tli[j]], sequences=T.arange(tli.shape[0]), non_sequences=[Wl, xx, l, tli]) xsi += T.sum(pl, axis=0)[0] pr, pr_ = theano.scan(lambda j, Wr, x, l, tri: T.dot(x[tri[j]], Wr[j]) * l[tri[j]], sequences=T.arange(tri.shape[0]), non_sequences=[Wr, xx, l, tri]) xsi += T.sum(pr, axis=0)[0] pa, pa_ = theano.scan(lambda j, x, l, tai: x[tai[j]] * l[tai[j]], sequences=T.arange(tai.shape[0]), non_sequences=[xx, l, tai]) xsi += T.sum(pa, axis=0)[0] xsi /= l[si] pp(xsi) pp(x_tm1) x_t = T.set_subtensor(x_tm1[si], T.tanh(xsi)) return x_t
def ADAM_OPTIMIZER(loss, all_params, learning_rate=0.001, b1=0.9, b2=0.999, e=1e-8, gamma=1 - 1e-8): """ CITE: http://sebastianruder.com/optimizing-gradient-descent/index.html#adam ADAM update rules Default values are taken from [Kingma2014] References: [Kingma2014] Kingma, Diederik, and Jimmy Ba. "Adam: A Method for Stochastic Optimization." arXiv preprint arXiv:1412.6980 (2014). http://arxiv.org/pdf/1412.6980v4.pdf """ updates = [] all_grads = theano.grad(loss, all_params) alpha = learning_rate t = theano.shared(np.float32(1).astype(theano.config.floatX)) # (Decay the first moment running average coefficient) b1_t = b1 * gamma**(t - 1) for params_previous, g in zip(all_params, all_grads): print(pp(params_previous), params_previous.dtype, params_previous.get_value().shape) init_moment = np.zeros(params_previous.get_value().shape, dtype=theano.config.floatX) # (the mean)i first_moment = theano.shared(init_moment) # (the uncentered variance) second_moment = theano.shared(init_moment) # (Update biased first moment estimate) bias_m = b1_t * first_moment + (1 - b1_t) * g # (Update biased second raw moment estimate) bias_v = b2 * second_moment + (1 - b2) * g**2 # (Compute bias-corrected first moment estimate) unbias_m = bias_m / (1 - b1**t) # (Compute bias-corrected second raw moment estimate) unbias_v = bias_v / (1 - b2**t) # (Update parameters) update_term = (alpha * unbias_m) / (T.sqrt(unbias_v) + e) params_new = params_previous - update_term updates.append((first_moment, bias_m)) updates.append((second_moment, bias_v)) updates.append((params_previous, params_new)) updates.append((t, t + 1.)) return updates
def custom_svrg2(loss, params, m, learning_rate=0.01, objective=None, data=None, target=None, getpred=None): theano.pp(loss) grads = theano.grad(loss, params) n = data.shape[0] updates = OrderedDict() rng = T.shared_randomstreams.RandomStreams(seed=149) for param, grad in zip(params, grads): value = param.get_value(borrow=True) mu = grad / n def oneStep(w): t = rng.choice(size=(1, ), a=n) loss_part_tilde = objective(getpred(data[t], param), target[t]) loss_part_tilde = loss_part_tilde.mean() g_tilde = theano.grad(loss_part_tilde, param) loss_part = objective(getpred(data[t], w), target[t]) loss_part = loss_part.mean() g = theano.grad(loss_part, w) w = w - learning_rate * (g - g_tilde + mu) return w w_tilde, scan_updates = theano.scan(fn=oneStep, outputs_info=param, n_steps=m) updates.update(scan_updates) updates[param] = w_tilde[-1] return updates
def main(): tmp_x = np.arange(0, 10, .1) tmp_a = np.arange(0.2, 0.5, 0.01) tmp_y = np.zeros((len(tmp_x), len(tmp_a))) for ida, a in enumerate(tmp_a): tmp_y[:, ida] = np.sin(tmp_x * a) plot2d(tmp_a, tmp_x, tmp_y) x = T.dscalar('x') a = T.dscalar('a') y = T.sin(x * a) ga = T.grad(y, a) pp(ga) f = theano.function([x, a], ga) a_series = np.arange(0.2, 0.5, .01) x_series = np.arange(0, 10, .1) y_series = np.zeros((len(x_series), len(a_series))) for idx, x_ in enumerate(x_series): for ida, a_ in enumerate(a_series): y_series[idx, ida] = f(x_, a_) plot2d(a_series, x_series, y_series) plot3d(a_series, x_series, y_series)
def main(): x = T.dscalar('x') y = T.dscalar('y') z = x + y f = function([x, y], z) xm = T.dmatrix('x') ym = T.dmatrix('y') fm = function([xm, ym], xm * ym) print(pp(xm * ym + 4 / ym)) print(f(2, 3), fm([[1, 2], [3, 4]], [[5, 6], [7, 8]])) xv = T.vector() yv = T.vector() fv = function([xv, yv], xv ** 2 + yv ** 2 + 2 * xv * yv) print(fv([1, 2], [3, 4]))
import numpy from theano import function from theano import pp import theano.tensor as T x = T.dmatrix('x') s = 1 / (1 + T.exp(-x)) print(pp(s)) logistic = function([x], s) i = [[0, 1], [-1, -2]] s2 = (1 + T.tanh(x / 2)) / 2 logistic2 = function([x], s2) print(logistic(i)) print(numpy.allclose(logistic(i), logistic2(i)))
#!/usr/bin/env python # -*- coding: utf-8 -*- import numpy as np import theano.tensor as T import theano from theano import function from theano import shared w = T.dvector("w") w_L2_norm_2 = T.square(w.norm(L=2)) g_w_L2_norm_2 = T.grad(w_L2_norm_2, w) print "To see the graph, difficult to see the result in math form..." theano.pp(g_w_L2_norm_2)
return g def theano_jac(u, eps_x=1, eps_y=1): J = T.concatenate([theano_grad(u[:,:,0], eps_x, eps_y)[:,:,:,None], theano_grad(u[:,:,1], eps_x, eps_y)[:,:,:,None]], axis=3) return J.dimshuffle(0, 1, 3, 2) def np_jac(u): J_np = np.empty((u.shape[0], u.shape[1], 2, 2)) J_np[:,:,0,:] = np.dstack(np.gradient(u[:,:,0])) J_np[:,:,1,:] = np.dstack(np.gradient(u[:,:,1])) return J_np u = T.dtensor3('u') cost_u_norm = (u**2).sum() print 'cost_u_norm', pp(cost_u_norm) #test_u = np.random.rand(2, 3, 2) #print 'yeah' #print theano.function([u], T.grad((theano_grad(u[:,:,0])**2).sum(), u))(test_u) J_u = theano_jac(u) J_u_func = theano.function([u], J_u) print pp(u) test_u = np.random.rand(2, 3, 2) print 'arr', test_u
def theano_gradient_funtimes(): import theano import numpy as np import theano.tensor as T import lasagne import ibeis_cnn.theano_ext as theano_ext TEST = True x_data = np.linspace(-10, 10, 100).astype(np.float32)[:, None, None, None] y_data = (x_data**2).flatten()[:, None] X = T.tensor4('x') y = T.matrix('y') #x_data_batch = #y_data_batch = inputs_to_value = {X: x_data[0:16], y: y_data[0:16]} l_in = lasagne.layers.InputLayer((16, 1, 1, 1)) l_out = lasagne.layers.DenseLayer( l_in, num_units=1, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Orthogonal()) network_output = lasagne.layers.get_output(l_out, X) # TEST NETWORK OUTPUT if TEST: result = theano_ext.eval_symbol(network_output, inputs_to_value) print('network_output = %r' % (result, )) loss_function = lasagne.objectives.squared_error #def loss_function(network_output, labels): # return (network_output - labels) ** 2 losses = loss_function(network_output, y) if TEST: result = theano_ext.eval_symbol(losses, inputs_to_value) print('losses = %r' % (result, )) loss = lasagne.objectives.aggregate(losses, mode='mean') if TEST: result = theano_ext.eval_symbol(loss, inputs_to_value) print('loss = %r' % (result, )) L2 = lasagne.regularization.regularize_network_params( l_out, lasagne.regularization.l2) weight_decay = .0001 loss_regularized = loss + weight_decay * L2 loss_regularized.name = 'loss_regularized' parameters = lasagne.layers.get_all_params(l_out) gradients_regularized = theano.grad(loss_regularized, parameters, add_names=True) if TEST: if False: s = T.sum(1 / (1 + T.exp(-X))) s.name = 's' gs = T.grad(s, X, add_names=True) theano.pp(gs) inputs_to_value = {X: x_data[0:16], y: y_data[0:16]} result = theano_ext.eval_symbol(gs, inputs_to_value) print('%s = %r' % ( gs.name, result, )) inputs_to_value = {X: x_data[16:32], y: y_data[16:32]} result = theano_ext.eval_symbol(gs, inputs_to_value) print('%s = %r' % ( gs.name, result, )) for grad in gradients_regularized: result = theano_ext.eval_symbol(grad, inputs_to_value) print('%s = %r' % ( grad.name, result, )) grad_on_losses = theano.grad(losses, parameters, add_names=True) learning_rate_theano = .0001 momentum = .9 updates = lasagne.updates.nesterov_momentum(gradients_regularized, parameters, learning_rate_theano, momentum) X_batch = T.tensor4('x_batch') y_batch = T.fvector('y_batch') func = theano.function( inputs=[theano.Param(X_batch), theano.Param(y_batch)], outputs=[network_output, losses], #updates=updates, givens={ X: X_batch, y: y_batch, }, ) y_predict_batch, loss_batch = func(inputs_to_value[X], inputs_to_value[y]) if ut.inIPython(): import IPython IPython.get_ipython().magic('pylab qt4') import plottool as pt pt.plot(x_data, y_predict) pt.iup() pass
def bsgd_partition(nn, data, name='sgd', lr=0.025, alpha=0.3, batch_size=500, epochs = 10): # train_set is a list of trainingsets divided into partitions train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] num_partitions = len(train_set_x) print "number of partitions:", num_partitions train_set_x = np.asarray(train_set_x) num_samples = train_set_x[0].get_value(borrow=True).shape[0] num_batches = num_samples / batch_size layers = nn.layers x = T.matrix('x') y = T.ivector('y') cost = nn.cost(x, y) accuracy = nn.calcAccuracy(x, y) params = nn.params delta_params = nn.delta_params print theano.pp(cost) # theano.pp(accuracy) p_grads = [T.grad(cost=cost, wrt = p) for p in params] # implementing gradient descent with momentum print p_grads updates = OrderedDict() for dp, gp in zip(delta_params, p_grads): updates[dp] = dp*alpha - gp*lr for p, dp in zip(params, delta_params): updates[p] = p + updates[dp] # updates = [(p, p - lr*gp) for p, gp in zip(params, p_grads)] index = T.ivector('index') ii = T.ivector('ii') y_eval = T.ivector('y_eval') batch_sgd_train = theano.function(inputs=[ii, index], outputs=[cost, accuracy], updates=updates, givens={x: train_set_x[index], y:train_set_y[index]}) batch_sgd_valid = theano.function(inputs=[], outputs=nn.calcAccuracy(x, y), givens={x: valid_set_x, y:valid_set_y}) batch_sgd_test = theano.function(inputs=[], outputs=nn.calcAccuracy(x, y), givens={x: test_set_x, y:test_set_y}) indices = np.arange(num_samples, dtype=np.dtype('int32')) np.random.shuffle(indices) for n in xrange(epochs): np.random.shuffle(indices) sup_indices = random.randrange(0, num_partitions) sup_indices = np.arange(num_partitions, dtype=np.dtype('int32')) for j in xrange(num_partitions): sup_index = sup_indices[j] for i in xrange(num_batches): # batch = [sup_index] batch = indices[i*batch_size: (i+1)*batch_size] batch_sgd_train([sup_index, batch]) print "validation accuracy:", batch_sgd_valid() print batch_sgd_test()
def getUpdateParams(self): update = [] aux = [] # Update state update.append( (self.params[0], input_layer.output) ) # Update output print 'Length: ' + str(len(self.connections)) for i, c in enumerate(self.connections): aux.append(sparse.structured_dot( sparse.transpose(c.input), self.params[2][i] * c.inhibition )) aux2 = aux.pop() for a in range(len(aux)): aux2 = sparse.add(aux2,aux.pop()) print aux2 from theano import pp print 'out: ' print pp(aux2) update.append((self.params[1],sparse.transpose(sparse.structured_sigmoid(aux2)))) # Hardcoded!! '''update.append((self.params[1], sparse.transpose( sparse.structured_sigmoid(sparse.structured_dot( sparse.transpose(self.connections[0].input), self.params[2][0]))))) ''' ''' update.append((self.params[1], sparse.transpose( sparse.structured_sigmoid( sparse.structured_dot( sparse.transpose(self.connections[0].input), # Input self.params[2][0]))))) # Weights ''' # Update weights ''' #Old ones (OJA) for i, w in enumerate(self.params[2]): update.append( (w, #layer.params[0])) sparse.add( w, self.LR[i]*sparse.transpose( sparse.structured_dot(self.params[1], self.x_yw[i]) ) ) )) ''' for i, w in enumerate(self.params[2]): update.append( (w, #w)) #layer.params[0])) sparse.structured_maximum( sparse.add( w, sparse.add(self.xy[i], self.AWW[i])), 0) ) ) return update
print("hello") import numpy as np import matplotlib.pyplot as plt import theano import theano.tensor as T foo = T.scalar('foo') bar = foo**2 print(type(bar)) print(bar.type) print(theano.pp(bar)) f = theano.function( [foo], bar) #the first argument of theano.function define the input of function print(f(5)) print(bar.eval({foo: 3})) def square(x): return x**2 bar = square(foo) print(bar.eval({foo: 3})) #theano.tensor A = T.matrix('A') x = T.vector('x') b = T.vector('b')
inputs=[x,y], outputs=[prediction, xent], updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb))) predict = theano.function(inputs=[x], outputs=prediction) # Train for i in range(training_steps): pred, err = train(D[0], D[1]) print "Final model:" print w.get_value(), b.get_value() print "target values for D:", D[1] print "prediction on D:", predict(D[0]) from theano import pp x = T.dscalar('x') y = x ** 2 gy = T.grad(y, x) pp(gy) # print out the gradient prior to optimization f = function([x], gy) f(4) f(94.2) x = T.dmatrix('x') s = T.sum(1 / (1 + T.exp(-x))) gs = T.grad(s, x) dlogistic = function([x], gs) dlogistic([[0, 1], [-1, -2]])
def grad(fn, args): gy = T.grad(fn, args) print pp(gy) return gy
# >>> x.type # TensorType(float64, scalar) # >>> T.dscalar # TensorType(float64, scalar) # >>> x.type is T.dscalar # True #By calling T.dscalar with a string argument, you create a Variable representing a #floating-point scalar quantity with the given name. x = T.dmatrix('x') y = T.dmatrix('y') z = x + y f = function([x, y], z) print f([[1, 2], [3, 4]], [[10, 20], [30, 40]]) print pp(z) a = T.vector() # declare variable out = a + a ** 10 # build symbolic expression f = function([a], out) # compile function print(f([0, 1, 2])) x = T.dmatrix('x') s = 1 / (1 + T.exp(-x)) logistic = function([x], s) print logistic([[0, 1], [-1, -2]]) s2 = (1 + T.tanh(x / 2)) / 2 logistic2 = function([x], s2) print logistic2([[0, 1], [-1, -2]])
import numpy import theano import theano.tensor as T from theano import pp x = T.scalar("x") y = x**2 gy = T.grad(y, x) pp(gy) f = theano.function([x], gy) print 'f(4) :' + str(f(4))
Theano tutorial from deeplearning.net Created on Wed Oct 14 16:47:05 2015 @author: drosen """ import theano.tensor as T from theano import function q x = T.dscalar('x') y = T.dscalar('y') z = x + y f = function([x,y],z) from theano import pp print pp(z) z.eval({x:6.4, y:12.1}) ## Adding two matrices x = T.dmatrix('x') y = T.dmatrix('y') z = x + y f= function([x,y],z) f([[1,2],[3,4]], [[10,20],[30,40]]) # output is a numpy array import numpy f(numpy.array([[1,2],[3,4]]), [[10,20],[30,40]]) # output is a numpy array ## Exercise
# In[ ]: # Create the scalars x = T.scalar() y = T.scalar() # In[ ]: print "Add two numbers" temp1 = x + y # So this is how you add two "Symbolic variables" addTh = theano.function([x,y],temp1) theano.pp(addTh.maker.fgraph.outputs[0]) # In[ ]: print addTh(1,2) # In[ ]: print "Comparing two numbers" temp1 = T.le(x, y) compTh = theano.function([x,y],temp1) theano.pp(compTh.maker.fgraph.outputs[0])
def test_subtensor(): x = theano.tensor.dvector() y = x[1] assert theano.pp(y) == "<TensorType(float64, vector)>[Constant{1}]"
#!/usr/bin/env python # coding=utf-8 ############################################################## # File Name : Adding_two_Scalars.py # Purpose : Basic scalar operation # Creation Date : Sat 15 Apr 2017 08:35:34 PM CST # Last Modified : 2017年04月18日 (週二) 13時37分55秒 # Created By : SL Chung ############################################################## import numpy as np import theano.tensor as T from theano import function from theano import pp # Variable object # T.dscalar is the type we assign to # 0-dimensional arrays (scalar) of doubles (d) # with the given name (string) x = T.dscalar('x') y = T.dscalar('y') z = x + y # show the function f, how z is calculated print('z = ', pp(z)) # The output of the function f is a numpy.ndarray with zero dimensions. f = function([x, y], z) print('f(x,y) = x + y') print('f(2,3) is', f(2, 3))
#!/usr/bin/python # -*- coding: utf-8 -*- import numpy import theano import theano.tensor as T from theano import pp x = T.dscalar('x') y = x**2 gy = T.grad(y, x) print(pp(gy)) # print out the gradient prior to optimization f = theano.function([x], gy) print(f(4)) print(numpy.array(8.0)) print(numpy.allclose(f(94.2), 188.4))
# Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg # Youku video tutorial: http://i.youku.com/pythontutorial # 4 - basic usage """ Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly. """ from __future__ import print_function import numpy as np import theano.tensor as T from theano import function # basic x = T.dscalar('x') # 定义theano的变量 y = T.dscalar('y') z = x+y # define the actual function in here f = function([x, y], z) # the inputs are in [], and the output in the "z" print(f(2,3)) # only give the inputs "x and y" for this function, then it will calculate the output "z" # to pretty-print the function,查看函数的定义 from theano import pp print(pp(z)) # z = x+y # how about matrix x = T.dmatrix('x') # 定义theano的矩阵,dmatrix代表float64,fmatrix代表float32 y = T.dmatrix('y') z = x + y # 或z = T.dot(x, y) f = function([x, y], z) print(f(np.arange(12).reshape((3,4)), 10*np.ones((3,4))))
from theano import pp from theano import In from theano import shared x = numpy.asarray([[1, 2], [3, 4], [5, 6]]) x.shape x = T.dscalar() y = T.dscalar() w = T.dscalar() z =( x + y)*w g = 10 f = function([x, In(y, value = 1), In(w, value = 2, name = 'w_by_name')], z) f(2,3, w_by_name=g) numpy.allclose(f(16.3, 12.1), 28.4) print(pp(z)) a = T.vector() b = T.vector() target = a ** 2 + b ** 2 + 2 * a * b f1 = function([a, b], target) print(f1([1, 2], [4, 5])) x = T.dmatrix() s = 1 / (1 + T.exp(-x)) logistic = function([x], s) m = [[1, 2], [3, 4], [5, 6]] logistic(m) s2 = (1 + T.tanh(x/2))/2 logistic2 = function([x], s2)
import numpy import theano import theano.tensor as T from theano import pp x = T.dscalar('x') y = x ** 2 gy = T.grad(y, x) print pp(gy) # print out the gradient prior to optimization #'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))' f = theano.function([x], gy) print f(4) print numpy.allclose(f(94.2), 188.4) print pp(f.maker.fgraph.outputs[0])
# coding: utf-8 import numpy import theano import theano.tensor as T x = T.dscalar('x') y = (T.sqrt(x) + 1) ** 3 dy = T.grad(cost=y, wrt=x) f = theano.function(inputs=[x], outputs=dy) print theano.pp(f.maker.fgraph.outputs[0]) print f(2) print f(3)
# コスト関数と微分する変数を指定 x = T.dscalar('x') # 微分される数式のシンボルを定義 y = x**2 # yをxに関して微分 # y' = 2x # gyは微分の式のシンボル gy = T.grad(cost=y, wrt=x) # シンボルを使って微分係数を求める関数を定義 f = theano.function(inputs=[x], outputs=gy) # theano.pp()で微分の数式を表示 print(theano.pp(f.maker.fgraph.outputs[0])) # 具体的なxを与えて微分係数を求める print(f(2)) print(f(3)) print(f(4)) # y = e^{x] x = T.dscalar('x') # 微分される数式のシンボルを定義 y = T.exp(x) # yをxに関して微分 gy2 = T.grad(cost=y, wrt=x)
def bsgd(nn, data, name='sgd', lr=0.06, alpha=0.3, batch_size=300, epochs=20, percent_data=1.): train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] num_samples = train_set_x.get_value(borrow=True).shape[0] num_batches = int((num_samples / batch_size) * percent_data) layers = nn.layers x = T.matrix('x') y = T.ivector('y') y_eval = T.ivector('y_eval') cost = nn.cost(x, y) accuracy = nn.calcAccuracy(x, y) accuracy_phonemes = nn.calcAccuracyTimit(x, y) params = nn.params delta_params = nn.delta_params print theano.pp(cost) LR = Learning_Rate_Linear_Decay(start_rate=lr) # theano.pp(accuracy) index = T.ivector('index') learning_rate = T.scalar('learning_rate') momentum = T.scalar('momentum') p_grads = [T.grad(cost=cost, wrt = p) for p in params] alpha = 0.2 lr = 0.02 # implementing gradient descent with momentum print p_grads updates = OrderedDict() for dp, gp in zip(delta_params, p_grads): updates[dp] = dp*momentum - gp*learning_rate for p, dp in zip(params, delta_params): updates[p] = p + updates[dp] # updates = [(p, p - lr*gp) for p, gp in zip(params, p_grads)] batch_sgd_train = theano.function(inputs=[index, theano.Param(learning_rate, default=0.045), theano.Param(momentum, default=0.3)], outputs=[cost, accuracy, accuracy_phonemes], updates=updates, givens={x: train_set_x[index], y:train_set_y[index]}) batch_sgd_valid = theano.function(inputs=[], outputs=[nn.calcAccuracy(x, y)], givens={x: valid_set_x, y:valid_set_y}) batch_sgd_test = theano.function(inputs=[], outputs=[nn.calcAccuracy(x, y)], givens={x: test_set_x, y:test_set_y}) indices = np.arange(num_samples, dtype=np.dtype('int32')) np.random.shuffle(indices) train_error_epochs = [] # this function takes a list as input and computes a new list which is basically a diff list . def get_diff_list(li): li = [0] + li lidiff = [] for i in xrange(len(li)-1): lidiff.append(abs(li[i+1] - li[i])) return lidiff ofile = open('train_log.csv', "wb") train_log_w = csv.writer(ofile, delimiter=' ') for n in xrange(epochs): np.random.shuffle(indices) train_accuracy = [] for i in xrange(num_batches): batch = indices[i*batch_size: (i+1)*batch_size] c,a1,a2 = batch_sgd_train(index=batch, learning_rate=LR.getRate(), momentum=alpha) train_accuracy.append(a1) print LR.getRate() if LR.getRate() == 0: break wt = nn.get_weight() # print np.mean(wt[0].flatten()), np.mean(wt[1].flatten()), np.mean(wt[2].flatten()) valid_accuracy = batch_sgd_valid() log_n = ["epoch:", str(n), "train_accuracy:", str(np.mean(a1)), " train_accuracy_phonemes:", str(np.mean(a2)) , " validation_accuracy:", str(valid_accuracy[0])] train_log_w.writerow(log_n) print "epoch:", str(n), "train_accuracy:", str(np.mean(a1)), " train_accuracy_phonemes:", str(np.mean(a2)) , " validation_accuracy:", str(valid_accuracy[0]) # print "epoch:", n, " train accuracy", np.mean(a1) train_error_current = 1.0 - np.mean(a1) train_error_epochs.append(np.mean(a1)) LR.updateError(error=(1.0 - valid_accuracy[0])*100.0) LR.updateRate() test_accuracy = batch_sgd_test() print test_accuracy[0]
def f(prior_result, A): print("prior_result = %s" % pp(prior_result)) return prior_result * A
def _step( h_, c_, *y_ ): # m_: mask, h_: previous hidden, c_: previous hidden, *y_: raw inputs print "h_.ndim: ", h_.ndim print "c_.ndim: ", c_.ndim print "len(y_): ", len(y_) print "y_[0].shape: ", y_[ 0].shape # y_[0] has size of n_samples x output_dim (output_dim = 1 for sinewave example) print "y_[0].ndim: ", y_[0].ndim # build x_ from y_ x_ = y_[0] for y_tmp in y_[1:]: x_ = tensor.concatenate([x_, y_tmp], axis=1) print theano.pp(x_) h_printed_ = theano.printing.Print('h_ in step')(h_) c_printed_ = theano.printing.Print('c_ in step')(c_) x_printed_ = theano.printing.Print('x_ in step')(x_) print "x_.ndim: ", x_.ndim # embedding and activation #emb_ = tensor.dot(x_, tparams['Wemb']) emb_ = tensor.dot(x_printed_, tparams['Wemb']) print "emb_.ndim: ", emb_.ndim emb_printed_ = theano.printing.Print('emb_ in step')(emb_) #state_below_ = (tensor.dot(emb_, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]) state_below_ = (tensor.dot(emb_printed_, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]) print "state_below_.ndim: ", state_below_.ndim print "tparams[lstm_W].ndim: ", tparams['lstm_W'].ndim state_below_printed_ = theano.printing.Print('state_below_ in step')( state_below_) #preact = tensor.dot(h_, tparams[_p(prefix, 'U')]) preact = tensor.dot(h_printed_, tparams[_p(prefix, 'U')]) #preact += state_below_ # x_ preact += state_below_printed_ # x_ preact += tparams[_p(prefix, 'b')] print "preact.ndim: ", preact.ndim i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) c = tensor.tanh(_slice(preact, 3, options['dim_proj'])) #c = f * c_ + i * c c = f * c_printed_ + i * c #c = m_[:, None] * c + (1. - m_)[:, None] * c_ # if data is valid m_ = 1, else m_ = 0 (ignore the data) c_printed = theano.printing.Print('c in step')(c) c = c_printed h = o * tensor.tanh(c) #h = m_[:, None] * h + (1. - m_)[:, None] * h_ h_printed = theano.printing.Print('h in step')(h) h = h_printed if options['use_dropout']: #h = dropout_layer(h, use_noise, trng) proj = h * 0.5 else: proj = h proj_printed = theano.printing.Print('proj in step')(proj) #y = tensor.dot(h, tparams['U']) + tparams['b'] # tparams['U'] has size of dim_proj x output_dim (128 x 1 for sinewave example) y = tensor.dot(proj_printed, tparams['U']) + tparams[ 'b'] # tparams['U'] has size of dim_proj x output_dim (128 x 1 for sinewave example) y_printed = theano.printing.Print('y in step')(y) y = y_printed print "h.ndim: ", h.ndim print "c.ndim: ", c.ndim print "y.ndim: ", y.ndim return h, c, y
# このため、出力から順に微分のチェインルールを適用することで、数式の入力変数による # 微分を式として求めることができる。 # このため、原理的に未定義の関数や陰関数の微分はできない。 # あくまでTheanoの演算を用いて陽に構成された式が対象になる。 # この点がMaximaのような数式処理ソフトとは異なる。 print("倍精度スカラーとその2乗yを定義") x = T.dscalar('x') y = x ** 2 print("gyはyのxによる微分") gy = T.grad(y, x) print("コンパイル、最適化前のgyを表示") print("pp(gy) = %s\n" % pp(gy)) '((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))' print("fill(x ** 2, 1.0)はx**2と同じ形のテンソル(ここではスカラー)で全成分が1.0") print("つまり 1 * 2 * (x ** (2 - 1))で 2*xになっている。") print("fはgyをコンパイル、最適化したもの. debugprintを見ると2*xになっていることが分かる。") f = function([x], gy) print(debugprint(f)) print("さらにfのmaker.fgraph.outputs[0]プロパティをpretty printしても分かる。") print("pp(f.maker.fgraph.outputs[0]) = %s" % pp(f.maker.fgraph.outputs[0])) print("f(4) = %f" % f(4)) # array(8.0)
# 4 - basic usage """ Please note, this code is only for 3_python 3+. If you are using 3_python 2+, please modify the code accordingly. """ from __future__ import print_function import numpy as np import theano.tensor as T from theano import function # basic x = T.dscalar('x') y = T.dscalar('y') z = x + y # define the actual function in here f = function([x, y], z) # the inputs are in [], and the output in the "z" print( f(2, 3) ) # only give the inputs "x and y" for this function, then it will calculate the output "z" # to pretty-print the function from theano import pp print(pp(z)) # how about matrix x = T.dmatrix('x') y = T.dmatrix('y') z = x + y f = function([x, y], z) print(f(np.arange(12).reshape((3, 4)), 10 * np.ones((3, 4))))
#Adding two scalars import numpy import theano.tensor as K from theano import function a = K.dscalar('a') b = K.dscalar('b') c = a+b f = function([a, b], c]) f = function([a, b], c) f(30,10) f(-1,4) numpy.allclose(f(10,10),20) numpy.allclose(f(10,10),40) type(a) a.type K.dscalar from theano import pp print(pp(c)) numpy.allclose(c.eval({a:1,b:2}),3) numpy.allclose(c.eval({a:1,b:30}),3) numpy.allclose(c.eval({a:1,b:30}),31)
], axis=3) return J.dimshuffle(0, 1, 3, 2) def np_jac(u): J_np = np.empty((u.shape[0], u.shape[1], 2, 2)) J_np[:, :, 0, :] = np.dstack(np.gradient(u[:, :, 0])) J_np[:, :, 1, :] = np.dstack(np.gradient(u[:, :, 1])) return J_np u = T.dtensor3('u') cost_u_norm = (u**2).sum() print 'cost_u_norm', pp(cost_u_norm) #test_u = np.random.rand(2, 3, 2) #print 'yeah' #print theano.function([u], T.grad((theano_grad(u[:,:,0])**2).sum(), u))(test_u) J_u = theano_jac(u) J_u_func = theano.function([u], J_u) print pp(u) test_u = np.random.rand(2, 3, 2) print 'arr', test_u J_theano = J_u_func(test_u) print 'grad', J_theano J_np = np_jac(test_u) #print 'np grad', J_np
import theano as th import theano.tensor as T a = T.scalar('a') b = T.scalar('b') # c is the symbolic representation of 'a * b' c = a * b print(th.pp(c)) # cf is the compiled version of c ... print('.') cf = th.function([a, b], c) print('.') print(cf) # ... which we can call like any other function print(cf(3, 8)) # We can also make the function change the state of some external variable, # called a _shared_ variable i = th.shared(value=0.0, name='i') cf = th.function([a, b], c, updates=[(i, i + 1)]) print('cf has been called {} times'.format(i.get_value())) print(cf(3, 8)) print(cf(3, 8)) print(cf(3, 8))
#!/usr/bin/python3 # http://deeplearning.net/software/theano/tutorial/gradients.html import numpy as np import theano as tn x = tn.tensor.dscalar('x') y = x**2 gy = tn.tensor.grad(y, x) print ('dump gradient function') print (tn.pp (gy)) print ('--> fill((x ** 2), 1.0) means ones(size(x**2))') f = tn.function([x], gy) print (tn.pp (f.maker.fgraph.outputs[0])) print (f(4)) print (np.allclose(f(94.2), 188.4)) ''' derivative of logistic? ''' x = tn.tensor.dscalar('x') s = 1 / (1+tn.tensor.exp(-x)) gs = tn.tensor.grad(s, x) ds = tn.function([x], gs) print (tn.pp(ds.maker.fgraph.outputs[0])) ''' my test ''' x = tn.tensor.dscalar('x')
# with, let us make a simple functions: add two numbers together. Here is # how you do it: import numpy import theano.tensor as T from theano import function x = T.dscalar('x') y = T.dscalar('y') z = x + y print type(x) print type(y) print type(z) # z is yet another Variable which represents the addition of x and y. You # can use the pp() function to pretty-print out the computation associated # to z. from theano import pp print pp(z) f = function([x, y], z) print type(f) # As a shortcut, you can skip step: f = function([x, y], z), and just use # a variable's eval() method. the eval() method is not as flexible as # function() but it can do everything we have in the tutorial so far. It # has the added benefit of not requiring you to import function(). Here is # how eval() works: print numpy.allclose(z.eval({x: 16.3, y: 12.1}), 28.4) # We passed eval() a dictionary mapping symbolic variables to the values to # the values to substitute for them, and it returned the numerical value of # the expression. # And now that we are created our function we can use it: print f(2, 3) print numpy.allclose(f(16.3, 12.1), 28.4)
import numpy import theano import theano.tensor as T from theano import pp x = T.scalar("x") y = x ** 2 gy = T.grad(y,x) pp(gy) f = theano.function([x], gy) print 'f(4) :' + str(f(4))
# Inspired on the tutorial available at: # https://www.analyticsvidhya.com/blog/2016/04/neural-networks-python-theano/ import numpy as np import theano.tensor as T from theano import function from theano import shared from theano import pp # pretty-print # multiple output a = T.dscalar ('a') f = function ([a], [a ** 2, a ** 3]) print (f(3)) # computing gradients x = T.dscalar ('x') y = x ** 3 qy = T.grad (y, x) # qy = 3x ^ 2 f = function ([x], qy) g = function ([x], y) print (f (2)) print (g (3)) print (pp(qy))