def test_optimize_xent_vector3(self): # Same as test_optimize_xent_vector2, but y is the result of # a "flatten", and it used to make the constant-folding # of arange(y.shape[0]) happen before the xent optimization verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) b_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') b = T.vector('b') y_ = T.lvector('y_') y = y_.flatten() ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] for expr in bias_expressions: f = theano.function([x, b, y_], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] # [big_op, sum, dim_shuffle, flatten] assert len(ops) <= 4 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def __init__(self, name, data, model, optimizer, cost, outputs, debug_print=0, trainlog=None, extension=None): self.name = name self.data = data self.model = model self.optimizer = optimizer self.inputs = model.inputs self.params = model.params self.cost = cost self.outputs = tolist(outputs) self.updates = model.updates self.extension = extension self.debug_print = debug_print t0 = time.time() self.cost_fn = self.build_training_graph() print "Elapsed compilation time: %f" % (time.time() - t0) if self.debug_print: from theano.printing import debugprint debugprint(self.cost_fn) if trainlog is None: self.trainlog = TrainLog() else: self.trainlog = trainlog self.endloop = 0
def test_debug_print(): from theano.printing import debugprint x = T.dscalar('x') y = x ** 2 gy = T.grad(y, x) debugprint(gy) # print out the gradient prior to optimization f = theano.function([x], gy) debugprint(f.maker.fgraph.outputs[0])
def test_optimize_xent_vector2(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) b_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') b = T.vector('b') y = T.lvector('y') ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]) ] for expr in bias_expressions: f = theano.function([x, b, y], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] # [big_op, sum, dim_shuffle] assert len(ops) == 3 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [ 1 for o in ops if isinstance(o, T.AdvancedSubtensor) ] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def _test_TimeDistributedDense(): print '\n------------------------------------------------------------' print 'Test: Time Distributed Dense Unit' x = U.Input(3, name='X') tdd = U.TimeDistributedDense(16, 1024, 128) tdd.set_input('input', x, 'output') x.build() tdd.check() tdd.build() print P.debugprint(tdd.get_output('output')(train=False))
def _test_TimeDistributedDense(): print '\n------------------------------------------------------------' print 'Test: Time Distributed Dense Layer' x = L.Input(3, name='X') tdd = L.TimeDistributedDense(16,1024,128) tdd.set_function('activation', activation_linear) tdd.set_function('init', init_glorot_uniform) tdd.set_input('input', x, 'output') x.build() tdd.build() print P.debugprint(tdd.get_output('output'))
def test_basic(self): c = T.matrix() p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x') # test that function contains softmax and no div. f = theano.function([c],p_y, mode=self.mode) f_ops = [n.op for n in f.maker.env.toposort()] print '--- f =' printing.debugprint(f) print '===' assert len(f_ops) == 1 assert softmax in f_ops f(self.rng.rand(3,4).astype(config.floatX))
def detect_nan(fgraph, i, node, fn): from theano.printing import debugprint for output in fn.outputs: if ( not isinstance(output[0], np.random.RandomState) and np.isnan(output[0]).any() ): print("*** NaN detected ***") debugprint(node) print("Inputs : %s" % [input[0] for input in fn.inputs]) print("Outputs: %s" % [output[0] for output in fn.outputs]) break
def test_basic(self): c = T.matrix() p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x') # test that function contains softmax and no div. f = theano.function([c], p_y, mode=self.mode) f_ops = [n.op for n in f.maker.env.toposort()] print '--- f =' printing.debugprint(f) print '===' assert len(f_ops) == 1 assert softmax in f_ops f(self.rng.rand(3, 4).astype(config.floatX))
def __init__(self, name, data, model, optimizer, cost, outputs, n_steps, debug_print=0, trainlog=None, extension=None, lr_iterations=None, decay_schedule=2, k_speedOfconvergence=40): #picklelized? self.name = name # yes self.data = data # no self.model = model #yes self.optimizer = optimizer #no self.inputs = model.inputs #no self.cost = cost #yes self.outputs = tolist(outputs) #no self.updates = OrderedDict() # no self.updates.update(model.updates) #??? self.extension = extension #no self.debug_print = debug_print #no lr_scalers = OrderedDict() #yes for node in self.model.nodes: #should lr_scalers[node.name] = node.lr_scaler self.optimizer.lr_scalers = lr_scalers #should self.nBernoulli = np.ones((n_steps, )) #yes t0 = time.time() self.cost_fn = self.build_training_graph() # no but should print "Elapsed compilation time: %f" % (time.time() - t0) if self.debug_print: #no from theano.printing import debugprint debugprint(self.cost_fn) if trainlog is None: #yes self.trainlog = TrainLog() else: self.trainlog = trainlog self.endloop = 0 #no self.lr_iterations = lr_iterations #yes self.lastBatchlastPoch = 0 #yes self.decay_schedule = decay_schedule #yes self.k = k_speedOfconvergence #yes self.schedRate = 1 #yes self.n_steps = n_steps #yes
def __init__(self, nh, nc, ne, de, cs): ''' nh :: dimension of the hidden layer nc :: number of classes ne :: number of word embeddings in the vocabulary de :: dimension of the word embeddings cs :: word window context size ''' # parameters of the model self.emb = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (ne+1, de)).astype(theano.config.floatX)) # add one for PADDING at the end self.Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (de * cs, nh)).astype(theano.config.floatX)) self.Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX)) self.W = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nc)).astype(theano.config.floatX)) self.bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX)) self.b = theano.shared(numpy.zeros(nc, dtype=theano.config.floatX)) self.h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX)) # bundle self.params = [ self.emb, self.Wx, self.Wh, self.W, self.bh, self.b, self.h0 ] self.names = ['embeddings', 'Wx', 'Wh', 'W', 'bh', 'b', 'h0'] idxs = T.imatrix() # as many columns as context window size/lines as words in the sentence x = self.emb[idxs].reshape( (T.cast(idxs.shape[0], "int32"), de*cs) ) y = T.iscalar('y') # label def recurrence(x_t, h_tm1): h_t = T.nnet.sigmoid(T.dot(x_t, self.Wx) + T.dot(h_tm1, self.Wh) + self.bh) s_t = T.nnet.softmax(T.dot(h_t, self.W) + self.b) return [h_t, s_t] [h, s], _ = theano.scan(fn=recurrence, sequences=x, outputs_info=[self.h0, None], n_steps=T.cast(x.shape[0], "int32")) p_y_given_x_lastword = s[-1,0,:] p_y_given_x_sentence = s[:,0,:] y_pred = T.argmax(p_y_given_x_sentence, axis=1) # cost and gradients and learning rate lr = T.fscalar('lr') nll = -T.log(p_y_given_x_lastword)[y] gradients = T.grad(nll, self.params) updates = OrderedDict( (p, p - lr*g) for p, g in zip(self.params, gradients) ) # theano functions self.classify = theano.function(inputs=[idxs], outputs=y_pred) self.train = theano.function(inputs = [idxs, y, lr], outputs = nll, updates = updates) debugprint(self.train) self.normalize = theano.function(inputs = [], updates = {self.emb: self.emb/T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})
def test_1d_basic(self): # this should be a softmax, but of a one-row matrix c = T.vector() p_y = T.exp(c) / T.exp(c).sum() # test that function contains softmax and no div. f = theano.function([c], p_y) printing.debugprint(f) # test that function contains softmax and no div. backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([c], T.grad(p_y.sum(), c)) finally: config.warn.sum_div_dimshuffle_bug = backup printing.debugprint(g) raise SkipTest('Optimization not enabled for the moment')
def restore(self, data, optimizer, cost, outputs, n_steps, debug_print=0, trainlog=None, extension=None, lr_iterations=None, decay_schedule=2, k_speedOfconvergence=40): self.data = data self.optimizer = optimizer self.inputs = self.model.inputs self.cost = cost self.outputs = tolist(outputs) #self.updates = OrderedDict() #self.updates.update(self.model.updates) self.updates = self.model.updates self.extension = extension self.debug_print = debug_print lr_scalers = OrderedDict() for node in self.model.nodes: lr_scalers[node.name] = node.lr_scaler self.optimizer.lr_scalers = lr_scalers self.nBernoulli = np.ones((n_steps, )) t0 = time.time() self.cost_fn = self.build_training_graph() print "Elapsed compilation time: %f" % (time.time() - t0) if self.debug_print: from theano.printing import debugprint debugprint(self.cost_fn) if trainlog is None: self.trainlog = TrainLog() else: self.trainlog = trainlog self.endloop = 0 self.lr_iterations = lr_iterations self.lastBatchlastPoch = 0 self.decay_schedule = decay_schedule self.k = k_speedOfconvergence self.schedRate = 1 self.n_steps = n_steps
def test_optimize_xent_vector(self): verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') y = T.lvector('y') ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))] for expr in bias_expressions: f = theano.function([x, y], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] assert len(ops) == 5 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)] f(x_val, y_val) except Exception: theano.printing.debugprint(f) raise g = theano.function([x, y], T.grad(expr, x), mode=mode) if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) == 4 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax in ops assert softmax_grad not in ops g(x_val, y_val) except Exception: theano.printing.debugprint(g) raise
def test_grad(self): c = T.matrix() p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x') # test that function contains softmax and softmaxgrad w = T.matrix() backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([c, w], T.grad((p_y * w).sum(), c)) finally: config.warn.sum_div_dimshuffle_bug = backup g_ops = [n.op for n in g.maker.env.toposort()] print '--- g =' printing.debugprint(g) print '===' raise SkipTest('Optimization not enabled for the moment') assert len(g_ops) == 2 assert softmax in g_ops assert softmax_grad in g_ops g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3, 4)))
def test_grad(self): c = T.matrix() p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x') # test that function contains softmax and softmaxgrad w = T.matrix() backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([c,w],T.grad((p_y*w).sum(), c)) finally: config.warn.sum_div_dimshuffle_bug = backup g_ops = [n.op for n in g.maker.env.toposort()] print '--- g =' printing.debugprint(g) print '===' raise SkipTest('Optimization not enabled for the moment') assert len(g_ops) == 2 assert softmax in g_ops assert softmax_grad in g_ops g(self.rng.rand(3,4), self.rng.uniform(.5, 1, (3,4)))
def _test_RNN2(): print '\n------------------------------------------------------------' print 'Test: RNN layer 2' D = L.Input(2, name='DATA2') rnn2 = R.RNN(10,1024,10, name='RNN2') rnn2.set_function('activation', activation_sigmoid) rnn2.set_function('init', init_glorot_uniform) rnn2.set_function('inner_init', init_orthogonal) rnn2.set_input('input_single', D, 'output') D.build() rnn2.build() print 'Test mask 2:', D.input_mask(train=False) print 'Test output_last 2:' print P.debugprint(rnn2.get_output('output_last', train=False)) print 'Test output_sequence 2:' print P.debugprint(rnn2.get_output('output_sequence', train=False)) print 'Train mask 2:', D.input_mask(train=True) print 'Train output_last 2:' print P.debugprint(rnn2.get_output('output_last', train=True)) print 'Train output_sequence 2:' print P.debugprint(rnn2.get_output('output_sequence', train=True))
def _test_RNN1(): print '\n------------------------------------------------------------' print 'Test: RNN layer 1' X = L.Input(3, name='DATA3', mask_dim=2) rnn1 = R.RNN(3,1024,10, name='RNN1') rnn1.set_function('activation', activation_sigmoid) rnn1.set_function('init', init_glorot_uniform) rnn1.set_function('inner_init', init_orthogonal) rnn1.set_input('input_sequence', X, 'output') X.build() rnn1.build() print 'Test mask:', X.input_mask(train=False) print 'Test output_last:' print P.debugprint(rnn1.get_output('output_last', train=False)) print 'Test output_sequence:' print P.debugprint(rnn1.get_output('output_sequence', train=False)) print 'Train mask:', P.pprint(X.input_mask(train=True)) print 'Train output_last:' print P.debugprint(rnn1.get_output('output_last', train=True)) print 'Train output_sequence:' print P.debugprint(rnn1.get_output('output_sequence', train=True))
import numpy as np import theano import theano.tensor as T from theano.printing import debugprint a = T.matrix(dtype = theano.config.floatX, name = 'a') c = T.iscalar(name = 'c') d = c.astype(theano.config.floatX) b = a * (T.ones_like(d) - d) # debugprint(b f = theano.function([a, c], b) debugprint(f, print_type = True) # print theano.pp(f.maker.fgraph.outputs[0]) x = np.array([[5.0, 3.0], [2.5, -1.0]], dtype = np.float32) y = 0 print x, y print f(x, y)
def test_debugprint(): A = tensor.matrix(name='A') B = tensor.matrix(name='B') C = A + B C.name = 'C' D = tensor.matrix(name='D') E = tensor.matrix(name='E') F = D + E G = C + F mode = theano.compile.get_default_mode().including('fusion') g = theano.function([A, B, D, E], G, mode=mode) # just test that it work s = StringIO() debugprint(G, file=s) # test ids=int s = StringIO() debugprint(G, file=s, ids='int') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id 0] '' ", " |Elemwise{add,no_inplace} [id 1] 'C' ", " | |A [id 2]", " | |B [id 3]", " |Elemwise{add,no_inplace} [id 4] '' ", " |D [id 5]", " |E [id 6]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR s = StringIO() debugprint(G, file=s, ids='CHAR') s = s.getvalue() # The additional white space are needed! reference = "\n".join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " | |A [id C]", " | |B [id D]", " |Elemwise{add,no_inplace} [id E] '' ", " |D [id F]", " |E [id G]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR, stop_on_name=True s = StringIO() debugprint(G, file=s, ids='CHAR', stop_on_name=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " |Elemwise{add,no_inplace} [id C] '' ", " |D [id D]", " |E [id E]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids= s = StringIO() debugprint(G, file=s, ids='') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' ", " |Elemwise{add,no_inplace} 'C' ", " | |A ", " | |B ", " |Elemwise{add,no_inplace} '' ", " |D ", " |E ", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test print_storage=True s = StringIO() debugprint(g, file=s, ids='', print_storage=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' 0 [None]", " |A [None]", " |B [None]", " |D [None]", " |E [None]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test clients s = StringIO() # We must force the mode as otherwise it can change the clients order f = theano.function([A, B, D], [A + B, A + B - D], mode='FAST_COMPILE') debugprint(f, file=s, print_clients=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id A] '' 0 clients:[('output', ''), ('[id C]', 1)]", " |A [id D]", " |B [id E]", "Elemwise{sub,no_inplace} [id C] '' 1", " |Elemwise{add,no_inplace} [id A] '' 0 clients:[('output', ''), ('[id C]', 1)]", " |D [id F]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference
gc.collect() #after_after = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() #diff = after_after[0] - after[0] #if diff > expected_diff: # print "grad uses ",str(float(diff)/float(expected_diff))," times more memory than needed." updates = { grad : W} f = function([], updates = updates) from theano.printing import debugprint debugprint(f) print 'call' #before = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() f() gc.collect(); gc.collect(); gc.collect() #after = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() print 'references to initial array: ',sys.getrefcount(init_array) print "ALL DEALLOCS AFTER HERE ARE TOO LATE" print '--------------------------------'
import numpy as np import theano import theano.tensor as T import lasagne from lasagne.utils import compute_norms from theano.printing import debugprint param = theano.shared(np.random.randn(3, 5).astype(theano.config.floatX)) print "\nparam" print param.get_value() update = param + 100 print "\nupdate" debugprint(update, print_type=True) update = lasagne.updates.norm_constraint(update, 10) print "\nnorm_constraint" debugprint(update, print_type=True) func = theano.function([], [], updates=[(param, update)]) # Apply constrained update _ = func() norms = compute_norms(param.get_value()) print "\nparam" param_value = param.get_value() # print compute_norms(param_value).shape print param_value
def test_debugprint(): A = tensor.matrix(name='A') B = tensor.matrix(name='B') C = A + B C.name = 'C' D = tensor.matrix(name='D') E = tensor.matrix(name='E') F = D + E G = C + F mode = theano.compile.get_default_mode().including('fusion') g = theano.function([A, B, D, E], G, mode=mode) # just test that it work s = StringIO() debugprint(G, file=s) # test ids=int s = StringIO() debugprint(G, file=s, ids='int') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id 0] '' ", " |Elemwise{add,no_inplace} [id 1] 'C' ", " | |A [id 2]", " | |B [id 3]", " |Elemwise{add,no_inplace} [id 4] '' ", " |D [id 5]", " |E [id 6]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR s = StringIO() debugprint(G, file=s, ids='CHAR') s = s.getvalue() # The additional white space are needed! reference = "\n".join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " | |A [id C]", " | |B [id D]", " |Elemwise{add,no_inplace} [id E] '' ", " |D [id F]", " |E [id G]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR, stop_on_name=True s = StringIO() debugprint(G, file=s, ids='CHAR', stop_on_name=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " |Elemwise{add,no_inplace} [id C] '' ", " |D [id D]", " |E [id E]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids= s = StringIO() debugprint(G, file=s, ids='') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' ", " |Elemwise{add,no_inplace} 'C' ", " | |A ", " | |B ", " |Elemwise{add,no_inplace} '' ", " |D ", " |E ", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test print_storage=True s = StringIO() debugprint(g, file=s, ids='', print_storage=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' 0 [None]", " |A [None]", " |B [None]", " |D [None]", " |E [None]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test clients s = StringIO() # We must force the mode as otherwise it can change the clients order f = theano.function([A, B, D], [A + B, A + B - D], mode='FAST_COMPILE') debugprint(f, file=s, print_clients=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id A] '' 0 clients:[('[id B]', 1), ('output', '')]", " |A [id D]", " |B [id E]", "Elemwise{sub,no_inplace} [id B] '' 1", " |Elemwise{add,no_inplace} [id A] '' 0 clients:[('[id B]', 1), ('output', '')]", " |D [id F]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference
def test_debugprint(): A = tensor.matrix(name='A') B = tensor.matrix(name='B') C = A + B C.name = 'C' D = tensor.matrix(name='D') E = tensor.matrix(name='E') F = D + E G = C + F # just test that it work debugprint(G) # test ids=int s = StringIO.StringIO() debugprint(G, file=s, ids='int') s = s.getvalue() # The additional white space are needed! reference = """Elemwise{add,no_inplace} [@0] '' |Elemwise{add,no_inplace} [@1] 'C' | |A [@2] | |B [@3] |Elemwise{add,no_inplace} [@4] '' |D [@5] |E [@6] """ if s != reference: print '--' + s + '--' print '--' + reference + '--' assert s == reference # test ids=CHAR s = StringIO.StringIO() debugprint(G, file=s, ids='CHAR') s = s.getvalue() # The additional white space are needed! reference = """Elemwise{add,no_inplace} [@A] '' |Elemwise{add,no_inplace} [@B] 'C' | |A [@C] | |B [@D] |Elemwise{add,no_inplace} [@E] '' |D [@F] |E [@G] """ if s != reference: print '--' + s + '--' print '--' + reference + '--' assert s == reference # test ids=CHAR, stop_on_name=True s = StringIO.StringIO() debugprint(G, file=s, ids='CHAR', stop_on_name=True) s = s.getvalue() # The additional white space are needed! reference = """Elemwise{add,no_inplace} [@A] '' |Elemwise{add,no_inplace} [@B] 'C' |Elemwise{add,no_inplace} [@C] '' |D [@D] |E [@E] """ if s != reference: print '--' + s + '--' print '--' + reference + '--' assert s == reference # test ids= s = StringIO.StringIO() debugprint(G, file=s, ids='') s = s.getvalue() # The additional white space are needed! reference = """Elemwise{add,no_inplace} '' |Elemwise{add,no_inplace} 'C' | |A | |B |Elemwise{add,no_inplace} '' |D |E """ if s != reference: print '--' + s + '--' print '--' + reference + '--' assert s == reference
import theano from theano import tensor as T x = T.vector('x') W = T.matrix('W') b = T.vector('b') dot = T.dot(x, W) out = T.nnet.sigmoid(dot + b) from theano.printing import debugprint debugprint(dot) debugprint(out) f = theano.function(inputs=[x, W], outputs=dot) g = theano.function([x, W, b], out) h = theano.function([x, W, b], [dot, out]) i = theano.function([x, W, b], [dot + b, out]) debugprint(f) debugprint(g) from theano.printing import pydotprint import pydot import graphviz import pydot_ng as pydot pydotprint(f, outfile='pydotprint_f.png')
network = lasagne.layers.InputLayer((batch_size, input_size)) value_stream = lasagne.layers.DenseLayer(network, num_units=value_size, nonlinearity=None, W=lasagne.init.HeUniform(), b=lasagne.init.Constant(0.1)) advantage_stream = lasagne.layers.DenseLayer(network, num_units=advantage_size, nonlinearity=None, W=lasagne.init.HeUniform(), b=lasagne.init.Constant(0.1)) duel = DuelAggregateLayer([value_stream, advantage_stream]) print "\nvalue_stream" print lasagne.layers.get_output_shape(value_stream) debugprint(lasagne.layers.get_output(value_stream), print_type=True) print "\nadvantage_stream" print lasagne.layers.get_output_shape(advantage_stream) debugprint(lasagne.layers.get_output(advantage_stream), print_type=True) print "\nduel" print lasagne.layers.get_output_shape(duel) debugprint(lasagne.layers.get_output(duel), print_type=True) f = theano.function([input_symbol], lasagne.layers.get_output(duel, input_symbol)) g = theano.function([input_symbol], lasagne.layers.get_output(value_stream, input_symbol)) h = theano.function([input_symbol], lasagne.layers.get_output(advantage_stream, input_symbol))
def save_debugprint(obj, **kargs): file = kargs.pop('file', 'debugprint.log') with open(file, 'w') as file_: debugprint(obj, file=file_, **kargs)
import numpy as np import theano import theano.tensor as T import lasagne from lasagne.utils import compute_norms from theano.printing import debugprint param = theano.shared(np.random.randn(3, 5).astype(theano.config.floatX)) print "\nparam" print param.get_value() update = param + 100 print "\nupdate" debugprint(update, print_type = True) update = lasagne.updates.norm_constraint(update, 10) print "\nnorm_constraint" debugprint(update, print_type = True) func = theano.function([], [], updates=[(param, update)]) # Apply constrained update _ = func() norms = compute_norms(param.get_value()) print "\nparam" param_value = param.get_value() # print compute_norms(param_value).shape print param_value
def test_debugprint(): A = tensor.matrix(name="A") B = tensor.matrix(name="B") C = A + B C.name = "C" D = tensor.matrix(name="D") E = tensor.matrix(name="E") F = D + E G = C + F mode = theano.compile.get_default_mode().including("fusion") g = theano.function([A, B, D, E], G, mode=mode) # just test that it work s = StringIO() debugprint(G, file=s) # test ids=int s = StringIO() debugprint(G, file=s, ids="int") s = s.getvalue() # The additional white space are needed! reference = ("\n".join([ "Elemwise{add,no_inplace} [id 0] '' ", " |Elemwise{add,no_inplace} [id 1] 'C' ", " | |A [id 2]", " | |B [id 3]", " |Elemwise{add,no_inplace} [id 4] '' ", " |D [id 5]", " |E [id 6]", ]) + "\n") if s != reference: print("--" + s + "--") print("--" + reference + "--") assert s == reference # test ids=CHAR s = StringIO() debugprint(G, file=s, ids="CHAR") s = s.getvalue() # The additional white space are needed! reference = ("\n".join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " | |A [id C]", " | |B [id D]", " |Elemwise{add,no_inplace} [id E] '' ", " |D [id F]", " |E [id G]", ]) + "\n") if s != reference: print("--" + s + "--") print("--" + reference + "--") assert s == reference # test ids=CHAR, stop_on_name=True s = StringIO() debugprint(G, file=s, ids="CHAR", stop_on_name=True) s = s.getvalue() # The additional white space are needed! reference = ("\n".join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " |Elemwise{add,no_inplace} [id C] '' ", " |D [id D]", " |E [id E]", ]) + "\n") if s != reference: print("--" + s + "--") print("--" + reference + "--") assert s == reference # test ids= s = StringIO() debugprint(G, file=s, ids="") s = s.getvalue() # The additional white space are needed! reference = ("\n".join([ "Elemwise{add,no_inplace} '' ", " |Elemwise{add,no_inplace} 'C' ", " | |A ", " | |B ", " |Elemwise{add,no_inplace} '' ", " |D ", " |E ", ]) + "\n") if s != reference: print("--" + s + "--") print("--" + reference + "--") assert s == reference # test print_storage=True s = StringIO() debugprint(g, file=s, ids="", print_storage=True) s = s.getvalue() # The additional white space are needed! reference = ("\n".join([ "Elemwise{add,no_inplace} '' 0 [None]", " |A [None]", " |B [None]", " |D [None]", " |E [None]", ]) + "\n") if s != reference: print("--" + s + "--") print("--" + reference + "--") assert s == reference
def main(): parser = argparse.ArgumentParser('description') parser.add_argument('--logfolder', '-l', help='Log folder.') parser.add_argument('--csvfolder', '-c', help='Output CSV folder for graphs.') parser.add_argument('--output', '-o', help='Folder for saving output models.') parser.add_argument('--model', '-m', help='Selects a particular model.') parser.add_argument('--maxbatches', '-B', default=0, type=int, help='Maximum number of batches to process (in thousands).') parser.add_argument('--batchsize', '-b', type=int, default=BATCH_SIZE, help='Batch size') parser.add_argument('--dimensions', '-d', type=int, default=0, help='Number of dimensions from the space to use. If 0 (default), use all.') parser.add_argument('--learningrate', '-r', type=float, default=LEARNING_RATE, help='Learning rate') args = parser.parse_args() logger.debug("Reading distributional space '%s'" % SPACE_FILENAME) space = load_numpy(SPACE_FILENAME, insertblank=True) if args.dimensions: space.matrix = space.matrix[:,:args.dimensions] if True: m = space.matrix norm_mean = m[1:].mean(axis=0) norm_std = (m[1:].std(axis=0) * 10) m = (m - norm_mean) / norm_std m[0] = 0 space.matrix = m #space = space.normalize() logger.debug("Finished reading space") logger.debug("Space contains %d words with %d dimensions each." % space.matrix.shape) cbr = CorpusBatchReader(CORPUS_FOLDER, space, batch_size=args.batchsize) data_iterator = DataIterator(cbr, epochs=1, maxbatches=args.maxbatches * 1000) HIDDEN = space.matrix.shape[1] logger.debug("Compiling compute graph") R = data_iterator.test[0].shape[1] model = models.get_model(args.model, space, R, HIDDEN, args.learningrate) modelinfo = { 'model': args.model, 'learningrate': args.learningrate, 'hidden': HIDDEN, 'space': SPACE_FILENAME, 'dimensions': space.matrix.shape[1], } filename = _generate_filename(modelinfo) csvlog = CSVLogger(os.path.join(args.csvfolder, filename + ".csv")) logger.debug("Compilation finished") if DEBUG: logger.debug("Theano compute graph:\n" + debugprint(model._train.maker.fgraph.outputs[0], file='str')) logger.debug("Starting training") start_time = datetime.now() for X, Y in data_iterator: trainscore = model.train_on_batch(X, Y) if data_iterator.batch % 1000 == 0: valscore = model.evaluate(*data_iterator.val, verbose=False) testscore = model.evaluate(*data_iterator.test, verbose=False) progress = data_iterator.progress() elapsed = (datetime.now() - start_time) rank = intrinsic_eval(model, space, data_iterator.test[0], data_iterator.test[1]) #rank = 0.0 eta = _compute_eta(start_time, progress) batchinfo = dict( epoch=data_iterator.epoch, kbatch=data_iterator.batch/1000, trainscore=trainscore, valscore=valscore, testscore=testscore, intrinsic=rank, progress=100 * progress, elapsed=elapsed.total_seconds(), eta=eta ) info = _dictmerge(batchinfo, modelinfo) logger.debug("%(epoch)3d ep %(kbatch)8d Kba %(intrinsic)6.4f / %(valscore)8.5f / %(testscore)8.5f [%(progress)5.1f%% eta %(eta)s]" % info) del info['eta'] csvlog.append(info) if data_iterator.batch % 5000 == 0: checkpoint_filename = os.path.join(args.output, "%s__batch%08d.hd5" % (filename, data_iterator.batch)) logger.debug("Checkpointing model to %s" % checkpoint_filename) model.save_weights(checkpoint_filename, overwrite=True)
x = T.dscalar('x') y = x ** 2 print("gyはyのxによる微分") gy = T.grad(y, x) print("コンパイル、最適化前のgyを表示") print("pp(gy) = %s\n" % pp(gy)) '((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))' print("fill(x ** 2, 1.0)はx**2と同じ形のテンソル(ここではスカラー)で全成分が1.0") print("つまり 1 * 2 * (x ** (2 - 1))で 2*xになっている。") print("fはgyをコンパイル、最適化したもの. debugprintを見ると2*xになっていることが分かる。") f = function([x], gy) print(debugprint(f)) print("さらにfのmaker.fgraph.outputs[0]プロパティをpretty printしても分かる。") print("pp(f.maker.fgraph.outputs[0]) = %s" % pp(f.maker.fgraph.outputs[0])) print("f(4) = %f" % f(4)) # array(8.0) print("f(94.2) = %f" % f(94.2)) # array(188.40000000000001) print("xは倍精度行列") x = T.dmatrix('x') print("pp(x) = %s\n" % pp(x)) print("sはxのロジスティック関数")
x = T.dscalar('x') y = x**2 print("gyはyのxによる微分") gy = T.grad(y, x) print("コンパイル、最適化前のgyを表示") print("pp(gy) = %s\n" % pp(gy)) '((fill((x ** 2), 1.0) * 2) * (x ** (2 - 1)))' print("fill(x ** 2, 1.0)はx**2と同じ形のテンソル(ここではスカラー)で全成分が1.0") print("つまり 1 * 2 * (x ** (2 - 1))で 2*xになっている。") print("fはgyをコンパイル、最適化したもの. debugprintを見ると2*xになっていることが分かる。") f = function([x], gy) print(debugprint(f)) print("さらにfのmaker.fgraph.outputs[0]プロパティをpretty printしても分かる。") print("pp(f.maker.fgraph.outputs[0]) = %s" % pp(f.maker.fgraph.outputs[0])) print("f(4) = %f" % f(4)) # array(8.0) print("f(94.2) = %f" % f(94.2)) # array(188.40000000000001) print("xは倍精度行列") x = T.dmatrix('x') print("pp(x) = %s\n" % pp(x)) print("sはxのロジスティック関数")
def test_debugprint(): A = tensor.matrix(name='A') B = tensor.matrix(name='B') C = A + B C.name = 'C' D = tensor.matrix(name='D') E = tensor.matrix(name='E') F = D + E G = C + F mode = theano.compile.get_default_mode().including('fusion') g = theano.function([A, B, D, E], G, mode=mode) # just test that it work debugprint(G) # test ids=int s = StringIO() debugprint(G, file=s, ids='int') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id 0] '' ", " |Elemwise{add,no_inplace} [id 1] 'C' ", " | |A [id 2]", " | |B [id 3]", " |Elemwise{add,no_inplace} [id 4] '' ", " |D [id 5]", " |E [id 6]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR s = StringIO() debugprint(G, file=s, ids='CHAR') s = s.getvalue() # The additional white space are needed! reference = "\n".join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " | |A [id C]", " | |B [id D]", " |Elemwise{add,no_inplace} [id E] '' ", " |D [id F]", " |E [id G]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR, stop_on_name=True s = StringIO() debugprint(G, file=s, ids='CHAR', stop_on_name=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [id A] '' ", " |Elemwise{add,no_inplace} [id B] 'C' ", " |Elemwise{add,no_inplace} [id C] '' ", " |D [id D]", " |E [id E]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids= s = StringIO() debugprint(G, file=s, ids='') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' ", " |Elemwise{add,no_inplace} 'C' ", " | |A ", " | |B ", " |Elemwise{add,no_inplace} '' ", " |D ", " |E ", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test print_storage=True s = StringIO() debugprint(g, file=s, ids='', print_storage=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' 0 [None]", " |A [None]", " |B [None]", " |D [None]", " |E [None]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference
layer.build() """input, output""" # input of model X_train = data.input(train=True) X_test = data.input(train=False) # output of model y_train = output.output(train=True) y_test = output.output(train=False) mask_train = output.output_mask(train=True) # None in this example mask_test = output.output_mask(train=False) # None in this example print("X_train:", P.pprint(X_train)) print("X_test:", P.pprint(X_test)) print("y_train:") print(P.debugprint(y_train)) print("y_test:") print(P.debugprint(y_test)) """loss""" loss = objectives.get("categorical_crossentropy") weighted_loss = models.weighted_objective(loss) y = K.placeholder(ndim=K.ndim(y_train)) weights = K.placeholder(ndim=1) train_loss = weighted_loss(y, y_train, weights, mask_train) test_loss = weighted_loss(y, y_test, weights, mask_test) _y_train = K.placeholder(ndim=3, name="y_train") _y_test = K.placeholder(ndim=3, name="y_test")
import theano from theano import printing import theano.tensor as T import numpy as np print(T.scalar()) print(T.iscalar()) print(T.fscalar()) print(T.dscalar()) x = T.matrix('x') y = T.matrix('y') z = x + y print(z) print(theano.pprint(z)) print(printing.debugprint(z)) print(theano.pp(z)) print(z.eval({x: [[1, 2], [1, 3]], y: [[1, 0], [3, 4]]})) addition = theano.function([x, y], [z]) print(addition([[1, 2], [1, 3]], [[1, 0], [3, 4]])) print(printing.debugprint(addition)) print(addition(np.ones((2, 2), dtype=theano.config.floatX), np.zeros((2, 2), dtype=theano.config.floatX))) a = T.zeros((2, 3)) print(a.eval()) b = T.identity_like(a) print(b.eval()) c = T.arange(10) print(c.eval()) print(c.ndim) print(c.dtype)
def test_debugprint(): A = tensor.matrix(name='A') B = tensor.matrix(name='B') C = A + B C.name = 'C' D = tensor.matrix(name='D') E = tensor.matrix(name='E') F = D + E G = C + F # just test that it work debugprint(G) # test ids=int s = StringIO() debugprint(G, file=s, ids='int') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [@0] '' ", " |Elemwise{add,no_inplace} [@1] 'C' ", " | |A [@2]", " | |B [@3]", " |Elemwise{add,no_inplace} [@4] '' ", " |D [@5]", " |E [@6]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR s = StringIO() debugprint(G, file=s, ids='CHAR') s = s.getvalue() # The additional white space are needed! reference = "\n".join([ "Elemwise{add,no_inplace} [@A] '' ", " |Elemwise{add,no_inplace} [@B] 'C' ", " | |A [@C]", " | |B [@D]", " |Elemwise{add,no_inplace} [@E] '' ", " |D [@F]", " |E [@G]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids=CHAR, stop_on_name=True s = StringIO() debugprint(G, file=s, ids='CHAR', stop_on_name=True) s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} [@A] '' ", " |Elemwise{add,no_inplace} [@B] 'C' ", " |Elemwise{add,no_inplace} [@C] '' ", " |D [@D]", " |E [@E]", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference # test ids= s = StringIO() debugprint(G, file=s, ids='') s = s.getvalue() # The additional white space are needed! reference = '\n'.join([ "Elemwise{add,no_inplace} '' ", " |Elemwise{add,no_inplace} 'C' ", " | |A ", " | |B ", " |Elemwise{add,no_inplace} '' ", " |D ", " |E ", ]) + '\n' if s != reference: print('--' + s + '--') print('--' + reference + '--') assert s == reference
gc.collect() gc.collect() gc.collect() #after_after = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() #diff = after_after[0] - after[0] #if diff > expected_diff: # print "grad uses ",str(float(diff)/float(expected_diff))," times more memory than needed." updates = {grad: W} f = function([], updates=updates) from theano.printing import debugprint debugprint(f) print 'call' #before = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() f() gc.collect() gc.collect() gc.collect() #after = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info() print 'references to initial array: ', sys.getrefcount(init_array) print "ALL DEALLOCS AFTER HERE ARE TOO LATE" print '--------------------------------'
input_symbol = T.matrix(dtype = theano.config.floatX) network = lasagne.layers.InputLayer((batch_size, input_size)) value_stream = lasagne.layers.DenseLayer(network, num_units = value_size , nonlinearity = None , W = lasagne.init.HeUniform() , b = lasagne.init.Constant(0.1)) advantage_stream = lasagne.layers.DenseLayer(network, num_units = advantage_size , nonlinearity = None , W = lasagne.init.HeUniform() , b = lasagne.init.Constant(0.1)) duel = DuelAggregateLayer([value_stream, advantage_stream]) print "\nvalue_stream" print lasagne.layers.get_output_shape(value_stream) debugprint(lasagne.layers.get_output(value_stream), print_type = True) print "\nadvantage_stream" print lasagne.layers.get_output_shape(advantage_stream) debugprint(lasagne.layers.get_output(advantage_stream), print_type = True) print "\nduel" print lasagne.layers.get_output_shape(duel) debugprint(lasagne.layers.get_output(duel), print_type = True) f = theano.function([input_symbol], lasagne.layers.get_output(duel, input_symbol)) g = theano.function([input_symbol], lasagne.layers.get_output(value_stream, input_symbol)) h = theano.function([input_symbol], lasagne.layers.get_output(advantage_stream, input_symbol)) print "\nout" input_value = rng.rand(batch_size, input_size).astype(np.float32)
import numpy as np import theano import theano.tensor as T from theano.printing import debugprint a = T.matrix(dtype=theano.config.floatX, name='a') c = T.iscalar(name='c') d = c.astype(theano.config.floatX) b = a * (T.ones_like(d) - d) # debugprint(b f = theano.function([a, c], b) debugprint(f, print_type=True) # print theano.pp(f.maker.fgraph.outputs[0]) x = np.array([[5.0, 3.0], [2.5, -1.0]], dtype=np.float32) y = 0 print x, y print f(x, y)