def test_nested(): notimpl = NotImplementedOp() ifelseifelseif = IfElseIfElseIf() x1 = tt.scalar("x1") x2 = tt.scalar("x2") c1 = tt.scalar("c1") c2 = tt.scalar("c2") t1 = ifelse(c1, x1, notimpl(x2)) t1.name = "t1" t2 = t1 * 10 t2.name = "t2" t3 = ifelse(c2, t2, x1 + t1) t3.name = "t3" t4 = ifelseifelseif(tt.eq(x1, x2), x1, tt.eq(x1, 5), x2, c2, t3, t3 + 0.5) t4.name = "t4" linker = theano.link.vm.VMLinker(lazy=False) f = function([c1, c2, x1, x2], t4, mode=Mode(linker=linker, optimizer="fast_run")) with pytest.raises(NotImplementedOpException): f(1, 0, np.array(10, dtype=x1.dtype), 0) linker = theano.link.vm.VMLinker(lazy=True) f = function([c1, c2, x1, x2], t4, mode=Mode(linker=linker, optimizer="fast_run")) assert f(1, 0, np.array(10, dtype=x1.dtype), 0) == 20.5
def more_complex_test(): notimpl = NotImplementedOp() ifelseifelseif = IfElseIfElseIf() x1 = T.scalar('x1') x2 = T.scalar('x2') c1 = T.scalar('c1') c2 = T.scalar('c2') t1 = ifelse(c1, x1, notimpl(x2)) t1.name = 't1' t2 = t1 * 10 t2.name = 't2' t3 = ifelse(c2, t2, x1 + t1) t3.name = 't3' t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5) t4.name = 't4' f = function([c1, c2, x1, x2], t4, mode=Mode(linker='vm', optimizer='fast_run')) if theano.config.vm.lazy is False: try: f(1, 0, numpy.array(10, dtype=x1.dtype), 0) assert False except NotImplementedOp.E: pass else: print(f(1, 0, numpy.array(10, dtype=x1.dtype), 0)) assert f(1, 0, numpy.array(10, dtype=x1.dtype), 0) == 20.5 print('... passed')
def test_ifelse(): a = tt.scalar() b = generic() c = generic() notimpl = NotImplementedOp() lazys = [True] # We need lazy to end up being True for this test. if theano.config.vm__lazy in [True, None]: lazys = [True, None] cloops = [True, False] if theano.config.cxx == "": cloops = [False] for cloop in cloops: for lazy in lazys: linker = theano.link.vm.VMLinker(use_cloop=cloop, lazy=lazy) f = function( [a, b, c], ifelse(a, notimpl(b), c), mode=Mode(linker=linker, optimizer="fast_run"), ) with pytest.raises(NotImplementedOpException): f(1, "a", "b") assert f(0, "a", "b") == "b"
def test_ifelse(): a = T.scalar() b = generic() c = generic() notimpl = NotImplementedOp() lazys = [True] # We need lazy to end up being True for this test. if theano.config.vm.lazy in [True, None]: lazys = [True, None] cloops = [True, False] if theano.config.cxx == "": cloops = [False] for cloop in cloops: for lazy in lazys: linker = theano.gof.vm.VM_Linker(use_cloop=cloop, lazy=lazy) f = function([a, b, c], ifelse(a, notimpl(b), c), mode=Mode(linker=linker, optimizer='fast_run')) try: # print "case 1" f(1, 'a', 'b') assert False except NotImplementedOp.E: pass # print "... passed" # print "case 2" # print f(0, 'a', 'b') assert f(0, 'a', 'b') == 'b'
def more_complex_test(): notimpl = NotImplementedOp() ifelseifelseif = IfElseIfElseIf() x1 = T.scalar("x1") x2 = T.scalar("x2") c1 = T.scalar("c1") c2 = T.scalar("c2") t1 = ifelse(c1, x1, notimpl(x2)) t1.name = "t1" t2 = t1 * 10 t2.name = "t2" t3 = ifelse(c2, t2, x1 + t1) t3.name = "t3" t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5) t4.name = "t4" f = function([c1, c2, x1, x2], t4, mode=Mode(linker="vm", optimizer="fast_run")) if theano.config.vm.lazy is False: try: f(1, 0, np.array(10, dtype=x1.dtype), 0) assert False except NotImplementedOp.E: pass else: print(f(1, 0, np.array(10, dtype=x1.dtype), 0)) assert f(1, 0, np.array(10, dtype=x1.dtype), 0) == 20.5 print("... passed")
def test_ifelse(): a = T.scalar() b = generic() c = generic() notimpl = NotImplementedOp() f = function([a,b,c], ifelse(a, notimpl(b), c), mode=Mode(linker='vm', optimizer='fast_run')) try: print "case 1" f( 1, 'a', 'b') assert False except NotImplementedOp.E: pass print "... passed" print "case 2" print f( 0, 'a', 'b') assert f( 0, 'a', 'b') == 'b' print "... passed"
def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) ## manual implementation # check first stage padimg = imgval if conv_mode == 'full': padimg_shp = N.array( imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1])) padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp]) padimg[:, :, kshp[0] - 1:-kshp[0] + 1, kshp[1] - 1:-kshp[1] + 1] = imgval outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) if validate: # causes an atexit problem from scipy.signal.sigtools import _convolve2d from scipy.signal.signaltools import _valfrommode, _bvalfromboundary val = _valfrommode(conv_mode) bval = _bvalfromboundary('fill') for b in range(bsize): # loop over batches for n in range(nkern): # loop over filters for i in range(imshp[0]): # loop over input feature maps outval[b,n,...] += _convolve2d(\ imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]] ntot += time.time() - time1 # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py")) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ #[:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 if conv_op_py: time1 = time.time() for i in range(repeat): hidval3_ = propup3(imgval, w_flip) hidval3 = hidval3_ #[:,:,0::ss[0],0::ss[1]] tpytot += time.time() - time1 assert (N.abs(hidval2 - hidval3) < 1e-5).all() else: tpytot += 0 if validate: temp = N.abs(outval - hidval2) assert (temp < 1e-5).all() if validate and conv_op_py: temp = N.abs(outval - hidval3) assert (temp < 1e-5).all() imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
import numpy as np from theano import function, Mode from theano import tensor as T from theano import config a = T.vector() b = T.log(a) c = T.nnet.sigmoid(b) d = T.sqrt(c) e = T.concatenate((d, c), axis=0) f = b * c * d # This is the first bad line g = e + f h = g / c fn = function([a], h, mode=Mode(optimizer='None')) fn(np.ones((3, )).astype(a.dtype))
tag_lp=lp_table[tag_idx] tf_get_tag_lp1=function([tag_idx], tag_lp) print tf_get_tag_lp1(0) tf_get_tag_lp2=function([tag_idx], lp_table[tag_idx]) print tf_get_tag_lp2(0) "is ifelse lazy evaluated ?" a = T.scalar('a') def g(a): return T.sum(T.arange(a)) # compare with sum(range(a)) def f(a): return T.sum(T.arange(a)) # compare with sum(range(a)) f1=function([a], T.switch(T.gt(1,1), f(a), g(a+1)), mode=Mode(linker='cvm'), on_unused_input='ignore') g1=function([a], ifelse(T.gt(1,1), f(a), g(a+1)), mode=Mode(linker='cvm'), on_unused_input='ignore') timeit.timeit('f1(100000)', "from __main__ import f1", number=10000) timeit.timeit('g1(100000)', "from __main__ import g1", number=10000) # You must ensure that only theano ops are in the graph. Nothing else that might force an actual compilation before hand and destroy the laziness. "Problem with gradient computation in Theano with ifelse" import theano from theano import function, tensor from theano.ifelse import ifelse i = tensor.iscalar('i') # Vector of i elements, each equal to i a = tensor.alloc(i.astype('float64'), i) m = a.max()
def jobman(_options, channel=None): ################### PARSE INPUT ARGUMENTS ####################### o = parse_input_arguments(_options, 'RNN_theano/rnn_stream001/RNN_stream.ini') ####################### DEFINE THE TASK ######################### mode = Mode(linker='cvm', optimizer='fast_run') rng = numpy.random.RandomState(o['seed']) train_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_train_batches'], batch_size=o['task_train_batchsize'], noise=o['task_noise'], rng=rng) valid_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_valid_batches'], batch_size=o['task_valid_batchsize'], rng=rng) test_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_test_batches'], batch_size=o['task_test_batchsize'], rng=rng) if o['wout_pinv']: wout_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_wout_batches'], batch_size=o['task_wout_batchsize'], noise=o['task_wout_noise'], rng=rng) ###################### DEFINE THE MODEL ######################### def recurrent_fn(u_t, h_tm1, W_hh, W_ux, W_hy, b): x_t = TT.dot(W_ux, u_t) h_t = TT.tanh(TT.dot(W_hh, h_tm1) + x_t + b) y_t = TT.dot(W_hy, h_t) return h_t, y_t u = TT.tensor3('u') if o['error_over_all']: t = TT.tensor3('t') else: t = TT.matrix('t') h0 = TT.matrix('h0') b = shared_shape( floatX( numpy.random.uniform(size=(o['nhid'], ), low=-o['Wux_properties']['scale'], high=o['Wux_properties']['scale']))) alpha = TT.scalar('alpha') lr = TT.scalar('lr') W_hh = init(o['nhid'], o['nhid'], 'W_hh', o['Whh_style'], o['Whh_properties'], rng) W_ux = init(o['nhid'], train_set.n_ins, 'W_ux', o['Wux_style'], o['Wux_properties'], rng) W_hy = init(o['n_outs'], o['nhid'], 'W_hy', o['Why_style'], o['Why_properties'], rng) [h, y ], _ = theano.scan(recurrent_fn, sequences=u, outputs_info=[h0, None], non_sequences=[W_hh, W_ux, W_hy, TT.shape_padright(b)], name='recurrent_fn', mode=mode) init_h = h.owner.inputs[0].owner.inputs[2] #h = theano.printing.Print('h',attrs=('shape',))(h) if o['error_over_all']: out_err = TT.mean(TT.mean((y - t)**2, axis=0), axis=1) err = out_err.mean() else: out_err = ((y[-1] - t)**2).mean(axis=1) err = out_err.mean() # Regularization term if o['reg_projection'] == 'h[-1]': cost = h[-1].sum() elif o['reg_projection'] == 'err': cost = err elif o['reg_projection'] == 'random': trng = TT.shared_randomstreams.RandomStreams(rng.randint(1e6)) proj = trng.uniform(size=h[-1].shape) if o['sum_h2'] > 0: proj = TT.join(0, proj[:o['sum_h2']], TT.zeros_like(proj[o['sum_h2']:])) cost = TT.sum(proj * h[-1]) z, gh = TT.grad(cost, [init_h, h]) z.name = '__z__' z = z[:-1] - gh if o['sum_h'] > 0: z2 = TT.sum(z[:, :o['sum_h']]**2, axis=1) else: z2 = TT.sum(z**2, axis=1) v1 = z2[:-1] v2 = z2[1:] ## ## v2 = theano.printing.Print('v2')(v2) # floatX(1e-14) ratios = TT.switch(TT.ge(v2, 1e-12), TT.sqrt(v1 / v2), floatX(1)) norm_0 = TT.ones_like(ratios[0]) norm_t, _ = theano.scan(lambda x, y: x * y, sequences=ratios, outputs_info=norm_0, name='jacobian_products', mode=mode) norm_term = TT.sum(TT.mean(norm_t, axis=1)) if o['reg_cost'] == 'product': r = TT.mean(abs(TT.log(norm_t)), axis=1).sum() elif o['reg_cost'] == 'each': r = TT.mean(abs(TT.log(ratios)), axis=1).sum() elif o['reg_cost'] == 'product2': ratios2 = TT.switch(TT.ge(z2[-1], 1e-12), TT.sqrt(z2 / z2[-1]), floatX(1)) r = TT.mean(abs(TT.log(ratios2)), axis=1).sum() ratios = TT.switch(TT.ge(v2, 1e-12), TT.sqrt(v1 / v2), floatX(1e-12))[::-1] norm_0 = TT.ones_like(ratios[0]) norm_t, _ = theano.scan(lambda x, y: x * y, sequences=ratios, outputs_info=norm_0, name='jacobian_products', mode=mode) norm_term = floatX(0.1) + TT.sum(TT.mean(norm_t, axis=1)) gu = TT.grad(y[-1].sum(), u) if o['opt_alg'] == 'sgd': get_updates = lambda p, e, up: (sgd( p, e, lr=lr, scale=my1 / norm_term, updates=up)[0], [[], [ ], [TT.constant(0) for x in p]]) elif o['opt_alg'] == 'sgd_qn': get_updates = lambda p, e, up: sgd_qn(p, e, mylambda=floatX(o['mylambda']), t0=floatX(o['t0']), skip=floatX(o['skip']), scale=my1 / norm_term, lazy=o['lazy'], updates=up) if o['win_reg']: updates, why_extra = get_updates([W_hy], err, {}) cost = err + alpha * r updates, extras = get_updates([W_ux, W_hh, b], cost, updates) b_Why = why_extra[2][0] b_Wux = extras[2][0] b_Whh = extras[2][1] b_b = extras[2][2] else: updates, extras1 = get_updates([W_hy, W_ux], err, {}) cost = err + alpha * r updates, extras2 = get_updates([W_hh, b], cost, updates) b_Why = extras1[2][0] b_Wux = extras1[2][1] b_Whh = extras2[2][0] b_b = extras2[2][1] nhid = o['nhid'] train_batchsize = o['task_train_batchsize'] valid_batchsize = o['task_valid_batchsize'] test_batchsize = o['task_test_batchsize'] wout_batchsize = o['task_wout_batchsize'] train_h0 = shared_shape(floatX(numpy.zeros((nhid, train_batchsize)))) valid_h0 = shared_shape(floatX(numpy.zeros((nhid, valid_batchsize)))) test_h0 = shared_shape(floatX(numpy.zeros((nhid, test_batchsize)))) wout_h0 = shared_shape(floatX(numpy.zeros((nhid, wout_batchsize)))) idx = TT.iscalar('idx') train_u, train_t = train_set(idx) u.tag.shape = copy.copy(train_u.tag.shape) t.tag.shape = copy.copy(train_t.tag.shape) train = theano.function([u, t, lr, alpha], [out_err, r, norm_term], updates=updates, mode=mode, givens={h0: train_h0}) valid_u, valid_t = valid_set(idx) u.tag.shape = copy.copy(valid_u.tag.shape) t.tag.shape = copy.copy(valid_t.tag.shape) valid = theano.function([u, t], [out_err, r, norm_term], mode=mode, givens={h0: valid_h0}) test_u, test_t = test_set(idx) u.tag.shape = copy.copy(test_u.tag.shape) t.tag.shape = copy.copy(test_t.tag.shape) test = theano.function([u, t], [ out_err, r, norm_term, W_hh, W_ux, W_hy, b, z, y, h, u, gu, t, b_Whh, b_Wux, b_Why, b_b ], mode=mode, givens={h0: test_h0}) if o['wout_pinv']: wout_u, wout_t = wout_set.get_whole_tensors() def wiener_hopf_fn(u_t, t_t, H_tm1, Y_tm1, W_hh, W_ux, b, h0): def recurrent_fn(u_t, h_tm1, W_hh, W_ux, b): x_t = TT.dot(W_ux, u_t) h_t = TT.tanh(TT.dot(W_hh, h_tm1) + x_t + b) return h_t h_t, _ = theano.scan(recurrent_fn, sequences=u_t, outputs_info=h0, non_sequences=[W_hh, W_ux, b], name='recurrent_fn', mode=mode) H_t = H_tm1 + TT.dot(h_t[-1], h_t[-1].T) Y_t = Y_tm1 + TT.dot(h_t[-1], t_t.T) return H_t, Y_t H_0 = shared_shape(numpy.zeros((o['nhid'], o['nhid']), dtype=theano.config.floatX), name='H0') Y_0 = shared_shape(numpy.zeros((o['nhid'], o['n_outs']), dtype=theano.config.floatX), name='Y0') all_u = TT.tensor4('whole_u') all_t = TT.tensor3('whole_t') [H, Y], _ = theano.scan( wiener_hopf_fn, sequences=[all_u, all_t], outputs_info=[H_0, Y_0], non_sequences=[W_hh, W_ux, TT.shape_padright(b), h0], name='wiener_hopf_fn', mode=mode) length = TT.cast(all_u.shape[0] * all_u.shape[3], dtype=theano.config.floatX) H = H[-1] / length Y = Y[-1] / length H = H + floatX(o['wiener_lambda']) * TT.eye(o['nhid']) W_hy_solve = theano_linalg.solve(H, Y).T wout = theano.function([idx], [], mode=mode, updates={W_hy: W_hy_solve}, givens={ all_u: wout_u, all_t: wout_t, h0: wout_h0 }) ''' theano.printing.pydotprint(train, 'train.png', high_contrast=True, with_ids= True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('train%d_'%idx)+node.op.name, high_contrast = True, with_ids = True) theano.printing.pydotprint(train, 'valid.png', high_contrast=True, with_ids = True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('valid%d_'%idx)+node.op.name, high_contrast = True, with_ids = True) theano.printing.pydotprint(train, 'test.png', high_contrast=True, with_ids = True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('test%d_'%idx)+node.op.name, high_contrast = True, with_ids = True) if o['wout_pinv']: theano.printing.pydotprint(train, 'wout.png', high_contrast=True, with_ids = True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('wout%d_'%idx)+node.op.name, high_contrast = True, with_ids= True) ''' valid_set.refresh() #import GPUscan.ipdb; GPUscan.ipdb.set_trace() #rval = valid(valid_set.data_u[0],valid_set.data_t[0]) #################### DEFINE THE MAIN LOOP ####################### data = {} fix_len = o['max_storage_numpy'] #int(o['NN']/o['small_step']) avg_train_err = numpy.zeros((o['small_step'], o['n_outs'])) avg_train_reg = numpy.zeros((o['small_step'], )) avg_train_norm = numpy.zeros((o['small_step'], )) avg_valid_err = numpy.zeros((o['small_step'], o['n_outs'])) avg_valid_reg = numpy.zeros((o['small_step'], )) avg_valid_norm = numpy.zeros((o['small_step'], )) data['options'] = o data['train_err'] = -1 * numpy.ones((fix_len, o['n_outs'])) data['valid_err'] = -1 * numpy.ones((fix_len, o['n_outs'])) data['train_reg'] = -1 * numpy.ones((fix_len, )) data['valid_reg'] = -1 * numpy.ones((fix_len, )) data['train_norm'] = numpy.zeros((fix_len, )) data['valid_norm'] = numpy.zeros((fix_len, )) data['test_err'] = [None] * o['max_storage'] data['test_idx'] = [None] * o['max_storage'] data['test_reg'] = [None] * o['max_storage'] data['test_norm'] = [None] * o['max_storage'] data['y'] = [None] * o['max_storage'] data['z'] = [None] * o['max_storage'] data['t'] = [None] * o['max_storage'] data['h'] = [None] * o['max_storage'] data['u'] = [None] * o['max_storage'] data['gu'] = [None] * o['max_storage'] data['W_hh'] = [None] * o['max_storage'] data['W_ux'] = [None] * o['max_storage'] data['W_hy'] = [None] * o['max_storage'] data['b'] = [None] * o['max_storage'] data['b_ux'] = [None] * o['max_storage'] data['b_hy'] = [None] * o['max_storage'] data['b_hh'] = [None] * o['max_storage'] data['b_b'] = [None] * o['max_storage'] storage_exceeded = False stop = False old_rval = numpy.inf patience = o['patience'] n_train = o['task_train_batches'] n_valid = o['task_valid_batches'] n_test = o['task_test_batches'] n_test_runs = 0 test_pos = 0 valid_set.refresh() test_set.refresh() kdx = 0 lr_v = floatX(o['lr']) alpha_v = floatX(o['alpha']) lr_f = 1 if o['lr_scheme']: lr_f = o['lr_scheme'][1] / (o['NN'] - o['lr_scheme'][0]) alpha_r = 1 if o['alpha_scheme']: alpha_r = float(o['alpha_scheme'][1] - o['alpha_scheme'][0]) st = time.time() if channel: try: channel.save() except: pass for idx in xrange(int(o['NN'])): if o['lr_scheme'] and idx > o['lr_scheme'][0]: lr_v = floatX(o['lr'] * 1. / (1. + (idx - o['lr_scheme'][0]) * lr_f)) if o['alpha_scheme']: if idx < o['alpha_scheme'][0]: alpha_v = floatX(0) elif idx < o['alpha_scheme'][1]: pos = 2. * (idx - o['alpha_scheme'][0]) / alpha_r - 1. alpha_v = floatX(numpy.exp(-pos**2 / 0.2) * o['alpha']) else: alpha_v = floatX(0) jdx = idx % o['small_step'] avg_train_err[jdx, :] = 0 avg_train_reg[jdx] = 0 avg_train_norm[jdx] = 0 avg_valid_err[jdx, :] = 0 avg_valid_reg[jdx] = 0 avg_valid_norm[jdx] = 0 if o['wout_pinv'] and (idx % o['test_step'] == 0): wout_set.refresh() print( '* Re-computing W_hy using closed-form ' 'regularized wiener hopf formula') st_wout = time.time() wout(0) ed_wout = time.time() print '** It took ', ed_wout - st_wout, 'secs' print '** Average weight', abs(W_hy.get_value(borrow=True)).mean() print '*Re-generate training set ' st_gen = time.time() train_set.refresh() print '**Generation took', time.time() - st_gen, 'secs' for k in xrange(o['task_train_batches']): rval = train(train_set.data_u[k], train_set.data_t[k], lr_v, alpha_v) print '[',idx,'/',patience,'][',k,'/',n_train,'][train]', rval[0].mean(), \ rval[1], rval[2], (1./rval[2])*lr_v, alpha_v avg_train_err[jdx, :] += rval[0] avg_train_reg[jdx] += rval[1] avg_train_norm[jdx] += rval[2] train_set.clean() print '**Epoch took', time.time() - st, 'secs' avg_train_err[jdx] /= n_train avg_train_reg[jdx] /= n_train avg_train_norm[jdx] /= n_train st = time.time() for k in xrange(n_valid): rval = valid(valid_set.data_u[k], valid_set.data_t[k]) print '[',idx,'/',patience,'][',k,'/',n_valid,'][valid]', rval[0].mean(), \ rval[1], rval[2] avg_valid_err[jdx] += rval[0] avg_valid_reg[jdx] += rval[1] avg_valid_norm[jdx] += rval[2] avg_valid_err[jdx] /= n_valid avg_valid_reg[jdx] /= n_valid avg_valid_norm[jdx] /= n_valid if idx >= o['small_step'] and idx % o['small_step'] == 0: kdx += 1 if kdx >= o['max_storage_numpy']: kdx = o['max_storage_numpy'] // 3 storage_exceeded = True data['steps'] = idx data['kdx'] = kdx data['storage_exceeded'] = storage_exceeded data['train_err'][kdx] = avg_train_err.mean() data['valid_err'][kdx] = avg_valid_err.mean() data['train_reg'][kdx] = avg_train_reg.mean() data['valid_reg'][kdx] = avg_valid_reg.mean() data['train_norm'][kdx] = avg_train_norm.mean() data['valid_norm'][kdx] = avg_valid_norm.mean() if channel: try: _options['trainerr'] = data['train_err'][kdx].mean() _options['maxtrainerr'] = data['train_err'][kdx].max() _options['trainreg'] = data['train_reg'][kdx] _options['trainnorm'] = data['train_norm'][kdx] _options['validerr'] = data['valid_err'][kdx].mean() _options['maxvaliderr'] = data['valid_err'][kdx].max() _options['validreg'] = data['valid_reg'][kdx] _options['validnorm'] = data['valid_norm'][kdx] _options['steps'] = idx _options['patience'] = patience channel.save() except: pass test_err = [] test_reg = [] test_norm = [] for k in xrange(n_test): rval = test(test_set.data_u[k], test_set.data_t[k]) print '[',idx,'][',k,'/',n_test,'][test]',rval[0].mean()\ , rval[1], rval[2] test_err += [rval[0]] test_reg += [rval[1]] test_norm += [rval[2]] test_z = rval[7][:, :, :10] test_y = rval[8][:, :, :10] test_h = rval[9][:, :, :10] test_u = rval[10][:, :, :10] test_gu = rval[11][:, :, :10] test_t = rval[12][:, :10] data['test_idx'][test_pos] = idx data['test_pos'] = test_pos data['y'][test_pos] = test_y data['z'][test_pos] = test_z data['t'][test_pos] = test_t data['h'][test_pos] = test_h data['u'][test_pos] = test_u data['gu'][test_pos] = test_gu data['test_err'][test_pos] = test_err data['test_reg'][test_pos] = test_reg data['test_norm'][test_pos] = test_norm data['W_hh'][test_pos] = rval[3] data['W_ux'][test_pos] = rval[4] data['W_hy'][test_pos] = rval[5] data['b'][test_pos] = rval[6] data['b_hh'][test_pos] = rval[13] data['b_ux'][test_pos] = rval[14] data['b_hy'][test_pos] = rval[15] data['b_b'][test_pos] = rval[16] cPickle.dump( data, open( os.path.join(configs.results_folder(), o['path'], '%s_backup.pkl' % o['name']), 'wb')) print '** ', avg_valid_err[jdx].mean(), ' < ', old_rval, ' ? ' if avg_valid_err[jdx].mean() < old_rval: patience += o['patience_incr'] if avg_valid_err[jdx].mean() < old_rval * 0.997: test_err = [] test_reg = [] test_norm = [] for k in xrange(n_test): rval = test(test_set.data_u[k], test_set.data_t[k]) print '[',idx,'][',k,'/',n_test,'][test]',rval[0].mean()\ , rval[1], rval[2] test_err += [rval[0]] test_reg += [rval[1]] test_norm += [rval[2]] test_z = rval[7][:, :, :10] test_y = rval[8][:, :, :10] test_h = rval[9][:, :, :10] test_u = rval[10][:, :, :10] test_gu = rval[11][:, :, :10] test_t = rval[12][:, :10] data['test_idx'][test_pos] = idx data['test_pos'] = test_pos data['y'][test_pos] = test_y data['z'][test_pos] = test_z data['t'][test_pos] = test_t data['h'][test_pos] = test_h data['u'][test_pos] = test_u data['gu'][test_pos] = test_gu data['test_err'][test_pos] = test_err data['test_reg'][test_pos] = test_reg data['test_norm'][test_pos] = test_norm data['W_hh'][test_pos] = rval[3] data['W_ux'][test_pos] = rval[4] data['W_hy'][test_pos] = rval[5] data['b'][test_pos] = rval[6] data['b_hh'][test_pos] = rval[13] data['b_ux'][test_pos] = rval[14] data['b_hy'][test_pos] = rval[15] data['b_b'][test_pos] = rval[16] cPickle.dump( data, open( os.path.join(configs.results_folder(), o['path'], '%s.pkl' % o['name']), 'wb')) n_test_runs += 1 test_pos += 1 if test_pos >= o['max_storage']: test_pos = test_pos - o['go_back'] if numpy.mean(test_err) < 5e-5: patience = idx - 5 break old_rval = avg_valid_err[jdx].mean() if idx > patience: break
def test_WEIRD_STUFF(): n_vis = 3 rng = N.random.RandomState(unittest_tools.fetch_seed(7722342)) x = rng.randn(10, n_vis) y = rng.randn(10, n_vis) #set y to be like x with a lag of LAG LAG = 4 y[LAG:] = x[:-LAG, 0:n_vis] minimizer_fn1 = sgd_minimizer(stepsize=0.001, WEIRD_STUFF=False) minimizer_fn2 = sgd_minimizer(stepsize=0.001, WEIRD_STUFF=True) rnn_module1 = ExampleRNN(n_vis, minimizer_fn1) rnn_module2 = ExampleRNN(n_vis, minimizer_fn2) rnn1 = rnn_module1.make(mode='FAST_RUN') # rnn2 = rnn_module1.make(mode='FAST_COMPILE')#work # rnn2 = rnn_module1.make(mode='FAST_RUN')#fail rnn2 = rnn_module2.make(mode=Mode('c|py', 'fast_run')) #fail # rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace"))#work # rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_compile'))#work # rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run_stable'))#work # rnn2 = rnn_module1.make(mode=Mode('py', 'merge'))#work # rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace_opt"))#work # rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run'))#fail m = Mode('py', 'fast_run') for n in m.optimizer: print n.name if 0: topo1 = rnn1.minimizer.step_cost.maker.env.toposort() topo2 = rnn2.minimizer.step_cost.maker.env.toposort() for i in range(len(topo1)): print '1', i, topo1[i] print '2', i, topo2[i] if 1: topo1 = rnn1.minimizer.step.maker.env.toposort() topo2 = rnn2.minimizer.step.maker.env.toposort() for i in range(len(topo1)): print '1', i, topo1[i] print '2', i, topo2[i] import theano.printing print len(rnn1.minimizer.step.maker.inputs) print len(rnn2.minimizer.step.maker.inputs) print rnn1.minimizer.step.maker.inputs print rnn2.minimizer.step.maker.inputs # for i in range(1,len(rnn1.minimizer.step.maker.inputs)): # print "valid update:",theano.printing.pp(rnn1.minimizer.step.maker.inputs[i].update), # print rnn1.minimizer.step.maker.inputs[i].update.name # print "other update",theano.printing.pp(rnn2.minimizer.step.maker.inputs[i].update), # print rnn2.minimizer.step.maker.inputs[i].update.name # print dir(rnn1.minimizer.step.maker.inputs[5].update) # print dir(rnn2.minimizer.step.maker.inputs[5].update) niter = 3 for i in xrange(niter): print rnn1.minimizer.step_cost(x, y) print rnn2.minimizer.step_cost(x, y) # assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out assert (N.abs(rnn1.z0 - rnn2.z0) < 1e-8).all() print(N.abs(rnn1.w - rnn2.w) < 1e-8).all() print(N.abs(rnn1.w - rnn2.w)) print rnn1.w print rnn2.w assert (N.abs(rnn1.w - rnn2.w) < 1e-8).all()