def build_theano_functions(self, bottom, top): # building Theano functions from caffe_helper.theano_util import init_theano init_theano() import theano as tn import theano.tensor as T p = np.float32(self.p_) axis = self.axis_ if axis is None: axis = tuple(range(1, len(bottom[0].shape))) # blob to CudaNdArray # Forward pass Tensor = T.TensorType('float32', [False] * len(bottom[0].shape)) s_x = Tensor('x') # bottom data s_dz = Tensor('dz') # top diff s_z = s_x * ((s_x**p).sum(axis, keepdims=True)**(np.float32(-1. / p))) # See http://goo.gl/wIVRsP for `tn.Out(x, borrow=True)` self.f_forward = tn.function([s_x], tn.Out(s_z, borrow=True)) # Backward pass s_l = (s_dz * s_z).sum() s_grad = tn.grad(s_l, wrt=s_x) self.f_backward = tn.function([s_x, s_dz], tn.Out(s_grad, borrow=True))
def test_bug_2009_07_17_borrowed_output(): # Regression test for a bug where output was borrowed by mistake. a = theano.tensor.dmatrix() b = theano.tensor.dmatrix() # The output should *NOT* be borrowed. g = theano.function([a, b], theano.Out(theano.tensor.dot(a, b), borrow=False)) x = np.zeros((1, 2)) y = np.ones((2, 5)) z = g(x, y) print(z) # Should be zero. x.fill(1) print(g(x, y)) # Should be non-zero. print(z) # Should still be zero. assert np.linalg.norm(z) == 0 # The code above was supposed to fail when it was written (or, more # accurately, on the next revision, i.e. when it was merged with the # rest of the code, i.e. on revision cac9c9e9f08e). # However, for some reason, it does not fail anymore when at this revision. # Thus, a new test (below) was added that exhibits the same issue. Note # that it may better be moved into the test_nnet.py test file if it turns # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias', # and was not a more general issue. test_output_activation_no_bias = theano.tensor.dmatrix() test_b2 = theano.tensor.dvector() test_target = theano.tensor.ivector() nll_softmax_argmax = ( crossentropy_softmax_argmax_1hot_with_bias( test_output_activation_no_bias, test_b2, test_target)) output = nll_softmax_argmax[1] g = theano.function([test_output_activation_no_bias, test_b2, test_target], theano.Out(output, borrow=False)) a = np.zeros((1, 5)) b = np.ones(5) c = np.zeros(1, dtype=np.int32) z = g(a, b, c) z_backup = copy.copy(z) id_z = id(z) print(('Output z after first call: %s' % (z, ))) a[0, 0] = 1 id_other = id(g(a, b, c)) print(('Output z after second call: %s' % (z, ))) # Ensure that calling the function again returns a pointer towards a new # array. assert id_z != id_other # Just to be 100% sure, ensure that z was not altered. assert (z == z_backup).all()
def test_aliasing_3(self): import theano, theano.tensor x = theano.tensor.matrix() y = 2*x f = theano.function([theano.In(x, borrow=True)], theano.Out(y, borrow=True))
def test_dnn_conv_desc_merge(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) kern_shp = T.as_tensor_variable( numpy.asarray([3, 1, 2, 2]).astype('int64')) desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2), conv_mode='conv')(kern_shp) desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1), conv_mode='cross')(kern_shp) # CDataType is not DeepCopyable so this will crash if we don't use # borrow=True f = theano.function([], [theano.Out(desc1, borrow=True), theano.Out(desc2, borrow=True)]) d1, d2 = f() # This will be the case if they are merged, which would be bad. assert d1 != d2
def cpu_expr_to_gpu(expr, unsafe=False): """Given a CPU expr return the same expression for the GPU. If unsafe is set to True, subsequent function calls evaluating the expression might return arrays pointing at the same memory region. """ expr = T.cast(expr, 'float32') return theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(expr), borrow=unsafe)
def test_dnn_conv_desc_merge(): if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img_shp = T.as_tensor_variable(numpy.asarray([2, 1, 8, 8]).astype('int64')) kern_shp = T.as_tensor_variable( numpy.asarray([3, 1, 2, 2]).astype('int64')) desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2), conv_mode='conv')(img_shp, kern_shp) desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1), conv_mode='cross')(img_shp, kern_shp) # CDataType is not DeepCopyable so this will crash if we don't use # borrow=True f = theano.function( [], [theano.Out(desc1, borrow=True), theano.Out(desc2, borrow=True)], mode=mode_with_gpu) d1, d2 = f() # This will be the case if they are merged, which would be bad. assert d1 != d2 desc1v2 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2), conv_mode='conv')(img_shp, kern_shp) f = theano.function( [], [theano.Out(desc1, borrow=True), theano.Out(desc1v2, borrow=True)], mode=mode_with_gpu) assert len([ n for n in f.maker.fgraph.apply_nodes if isinstance(n.op, dnn.GpuDnnConvDesc) ]) == 1 # CDATA type don't equal even if they represent the same object # So we can't use debugmode with it. if theano.config.mode not in ["DebugMode", "DEBUG_MODE"]: d1, d2 = f() # They won't be equal if they aren't merged. assert d1 == d2
def pairwise_theano_tensor_prepare(dtype): X = TT.matrix(dtype=str(dtype)) dists = TT.sqrt(TT.sum(TT.sqr(X[:, None, :] - X), axis=2)) name = 'pairwise_theano_broadcast_' + dtype rval = theano.function([X], theano.Out(dists, borrow=True), allow_input_downcast=True, name=name) rval.__name__ = name return rval
def t_binomial(mean, size, const_size, var_input, input, steps, rtol): R = MRG_RandomStreams(234, use_cuda=False) u = R.binomial(size=size, p=mean) f = theano.function(var_input, u, mode=mode) out = f(*input) # Increase the number of steps if sizes implies only a few samples if numpy.prod(const_size) < 10: steps_ = steps * 100 else: steps_ = steps basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input, allow_01=True, target_avg=mean, mean_rtol=rtol) if mode != 'FAST_COMPILE' and cuda_available: R = MRG_RandomStreams(234, use_cuda=True) u = R.binomial(size=size, p=mean, dtype='float32') # well, it's really that this test w GPU doesn't make sense otw assert u.dtype == 'float32' f = theano.function(var_input, theano.Out( theano.sandbox.cuda.basic_ops.gpu_from_host(u), borrow=True), mode=mode_with_gpu) gpu_out = numpy.asarray(f(*input)) basictest(f, steps_, const_size, prefix='mrg gpu', inputs=input, allow_01=True, target_avg=mean, mean_rtol=rtol) numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6) RR = theano.tensor.shared_randomstreams.RandomStreams(234) uu = RR.binomial(size=size, p=mean) ff = theano.function(var_input, uu, mode=mode) # It's not our problem if numpy generates 0 or 1 basictest(ff, steps_, const_size, prefix='numpy', allow_01=True, inputs=input, target_avg=mean, mean_rtol=rtol)
def pairwise_theano_blas_prepare(dtype): X = TT.matrix(dtype=str(dtype)) X_norm_2 = (X**2).sum(axis=1) dists = TT.sqrt(2 * X_norm_2 - TT.dot(X, X.T)) name = 'pairwise_theano_blas_' + dtype rval = theano.function([X], theano.Out(dists, borrow=True), allow_input_downcast=True, name=name) rval.__name__ = name return rval
def test_loading_and_saving_1(self): import cPickle import theano, theano.tensor x = theano.tensor.matrix() y = 2*x my_obj = theano.function([theano.In(x, borrow=True)] , theano.Out(y, borrow=True)) mode_instance = theano.compile.mode.get_mode(None) if not isinstance(mode_instance, theano.compile.debugmode.DebugMode): # Here, we work in a temporary directory in order not to clutter # the Theano repository. Code relative to creating that dir and # removing it afterwards should _not_ be backported to the tutorial. from tempfile import mkdtemp origdir = os.getcwd() tmpdir = None try: tmpdir = mkdtemp() os.chdir(tmpdir) f = open('obj.save', 'wb') cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() f = open('obj.save', 'rb') loaded_obj = cPickle.load(f) f.close() obj1 = my_obj obj2 = my_obj obj3 = my_obj f = open('objects.save', 'wb') for obj in [obj1, obj2, obj3]: cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() f = open('objects.save', 'rb') loaded_objects = [] for i in range(3): loaded_objects.append(cPickle.load(f)) f.close() finally: # Get back to the orinal dir, and temporary one. os.chdir(origdir) if tmpdir is not None: shutil.rmtree(tmpdir)
def MFmanual(V,T,r,l,gamma,iterations,P=None,Q=None,H=None): """ Paramters: V : As many rows as documents T : As many rows as documents """ V = V.T T = T.T rng = np.random n = np.size(V,1) td = np.size(T,0) vd = np.size(V,0) if(P is None): P = rng.random((vd,r)).astype(theano.config.floatX) if(Q is None): Q = rng.random((td,r)).astype(theano.config.floatX) if(H is None): H = rng.random((r,n)).astype(theano.config.floatX) tV = theano.shared(V.astype(theano.config.floatX),name="V") tT = theano.shared(T.astype(theano.config.floatX),name="T") tH = theano.shared(H,name="H") tQ = theano.shared(Q,name="Q") tP = theano.shared(P,name="P") tLambda = Th.scalar(name="l") tGamma = Th.scalar(name="gamma") tEV = (1.0/2.0)*((tV-Th.dot(tP,tH))**2).sum() tET = (1.0/2.0)*((tT-Th.dot(tQ,tH))**2).sum() tReg = (1.0/2.0)*tLambda*(((tP**2).sum())+((tQ**2).sum())+((tH**2).sum())) tCost = tEV + tET + tReg gP = -1.0 *(Th.dot(tV,tH.T) - Th.dot(tP,Th.dot(tH,tH.T)) - tLambda*tP) gQ = -1.0 *(Th.dot(tT,tH.T) - Th.dot(tQ,Th.dot(tH,tH.T)) - tLambda*tQ) gH = -1.0 *(Th.dot(tP.T,tV) - Th.dot(tP.T,Th.dot(tP,tH)) + Th.dot(tQ.T,tT) - Th.dot(tQ.T,Th.dot(tQ,tH)) - tLambda*tH) train = theano.function( inputs=[tGamma,tLambda], outputs=[theano.Out(tCost,borrow=True)], updates={tP:tP - tGamma * gP, tQ : tQ - tGamma*gQ, tH : tH - tGamma*gH }, name="train") for i in range(0,iterations): print train(np.asarray(gamma,dtype=theano.config.floatX),np.asarray(l,dtype=theano.config.floatX)); return tP.get_value(),tQ.get_value(),tH.get_value()
def setup(self, bottom, top): self.reshape(bottom, top) from caffe_helper.theano_util import init_theano init_theano() import theano as tn import theano.tensor as T shape1 = bottom[0].shape # prediction shape2 = bottom[1].shape # label s_p = T.TensorType('float32', [False] * len(shape1))('p') s_t = T.TensorType('float32', [False] * len(shape2))('t') # Forward pass FLTMIN = np.finfo(np.float32).tiny s_l = -T.mean( T.log(T.maximum(FLTMIN, s_p.flatten(2)))[T.arange(s_t.shape[0]), T.cast(s_t, 'int32')]) self.f_forward = tn.function([s_p, s_t], tn.Out(s_l, borrow=True)) # Backward pass s_dz = T.fscalar('dz') sg_p = tn.grad(s_dz * s_l, wrt=s_p) self.f_backward = tn.function([s_p, s_t, s_dz], tn.Out(sg_p, borrow=True))
def test_csc_correct_output_faster_than_scipy(self): sparse_dtype = 'float64' dense_dtype = 'float64' a = SparseType('csc', dtype=sparse_dtype)() b = tensor.matrix(dtype=dense_dtype) d = theano.dot(a, b) f = theano.function([a, b], theano.Out(d, borrow=True)) for M, N, K, nnz in [ (4, 3, 2, 3), (40, 30, 20, 3), (40, 30, 20, 30), (400, 3000, 200, 6000), ]: spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz)) mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype) theano_times = [] scipy_times = [] for i in xrange(5): t0 = time.time() theano_result = f(spmat, mat) t1 = time.time() scipy_result = spmat * mat t2 = time.time() theano_times.append(t1 - t0) scipy_times.append(t2 - t1) theano_time = numpy.min(theano_times) scipy_time = numpy.min(scipy_times) speedup = scipy_time / theano_time print scipy_times print theano_times print( 'M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time' '=%(theano_time)s speedup=%(speedup)s') % locals() # fail if Theano is slower than scipy by more than a certain amount overhead_tol = 0.003 # seconds overall overhead_rtol = 1.2 # times as long self.assertTrue(numpy.allclose(theano_result, scipy_result)) if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]: self.assertFalse( theano_time > overhead_rtol * scipy_time + overhead_tol)
def test_Gpujoin_inplace(): """Test Gpujoin to work inplace. This function tests the case when several elements are passed to the Gpujoin function but all except one of them are empty. In this case Gpujoin should work inplace and the output should be the view of the non-empty element. """ s = T.lscalar() data = numpy.array([3, 4, 5], dtype=theano.config.floatX) x = gpuarray_shared_constructor(data, borrow=True) z = T.zeros((s, )) join = GpuJoin(view=0) c = join(0, x, z) f = theano.function([s], theano.Out(c, borrow=True)) assert x.get_value(borrow=True, return_internal_type=True) is f(0) assert numpy.allclose(f(0), [3, 4, 5])
def test_Gpujoin_inplace(): # Test Gpujoin to work inplace. # # This function tests the case when several elements are passed to the # Gpujoin function but all except one of them are empty. In this case # Gpujoin should work inplace and the output should be the view of the # non-empty element. s = tt.lscalar() data = np.array([3, 4, 5], dtype=theano.config.floatX) x = gpuarray_shared_constructor(data, borrow=True) z = tt.zeros((s, )) join = GpuJoin(view=0) c = join(0, x, z) f = theano.function([s], theano.Out(c, borrow=True)) if not isinstance(mode_with_gpu, theano.compile.DebugMode): assert x.get_value(borrow=True, return_internal_type=True) is f(0) assert np.allclose(f(0), [3, 4, 5])
def function(self, additional_tags=None, gpu=False): mode = theano.compile.mode.get_default_mode() inputs = self.inputs output = self.outputs[0] inputs = [Var.var for Var in inputs] output = output.var assert len(self.outputs) == 1, "Multiple output assumption fails" if gpu: inputs, cpu_inputs = zip(*map(cpu_var_to_gpu_var, inputs)) output = self.op(*cpu_inputs) output = theano.sandbox.cuda.basic_ops.gpu_from_host(output) output = theano.Out(output, borrow=True) else: mode = mode.excluding('gpu') if additional_tags: mode = mode.including(additional_tags) return theano.function(inputs, output, mode=mode, name='test')
def __init__(self, tt_input, tt_output, updates=None, name='Unnamed Function', borrow_inp=False, borrow_out=False, profile_execution=False): self.name = name self.func = None self.profile = profile_execution self.last_exec_time = None self.updates = updates if borrow_inp: tt_input = [theano.In(x, borrow=True) for x in tt_input] self.tt_input = tt_input self.single_return = False if not isinstance(tt_output, (list, tuple)): tt_output = [tt_output,] self.single_return = True if borrow_out: tt_output = [theano.Out(x, borrow=True) for x in tt_output] self.tt_output = tt_output
def learning_function(self): """ Get the learning function. :param func: :return: """ network_updates = list(self.network.updates) + list(self.network.training_updates) learning_updates = list(self._learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) variables = self.network.input_variables + self.network.target_variables givens = None return theano.function( variables, map(lambda v: theano.Out(v, borrow=True), self.training_variables), updates=update_list, allow_input_downcast=True, mode=self.config.get("theano_mode", None), givens=givens)
def function(self, additional_tags=None, gpu=False): # inputs = [inp.clone() for inp in self.apply.inputs] # output = self.apply.op(inputs) # env = theano.FunctionGraph(inputs, [output]) mode = theano.compile.mode.get_default_mode() inputs = self.apply.inputs output = self.apply.outputs[0] assert len(self.apply.outputs) == 1, "Multiple output assumption fails" if gpu: inputs, cpu_inputs = zip(*map(cpu_var_to_gpu_var, inputs)) output = self.apply.op(*cpu_inputs) output = theano.sandbox.cuda.basic_ops.gpu_from_host(output) output = theano.Out(output, borrow=True) else: mode = mode.excluding('gpu') if additional_tags: mode = mode.including(additional_tags) return theano.function(inputs, output, mode=mode)
def test_dot_sparse_sparse(self): #test dot for 2 input sparse matrix sparse_dtype = 'float64' sp_mat = {'csc': sp.csc_matrix, 'csr': sp.csr_matrix} for sparse_format_a in ['csc', 'csr']: for sparse_format_b in ['csc', 'csr']: a = SparseType(sparse_format_a, dtype=sparse_dtype)() b = SparseType(sparse_format_b, dtype=sparse_dtype)() d = theano.dot(a, b) f = theano.function([a, b], theano.Out(d, borrow=True)) topo = f.maker.env.toposort() for M, N, K, nnz in [ (4, 3, 2, 3), (40, 30, 20, 3), (40, 30, 20, 30), (400, 3000, 200, 6000), ]: a_val = sp_mat[sparse_format_a](random_lil( (M, N), sparse_dtype, nnz)) b_val = sp_mat[sparse_format_b](random_lil( (N, K), sparse_dtype, nnz)) f(a_val, b_val)
def __init__(self, network, config=None, method=None): if method: logging.info("changing optimization method to '%s'" % method) if not config: config = TrainerConfig() elif isinstance(config, dict): config = TrainerConfig(config) config.method = method super(GeneralNeuralTrainer, self).__init__(network, config) logging.info('compiling %s learning function', self.__class__.__name__) network_updates = list(network.updates) + list( network.training_updates) learning_updates = list(self.learning_updates()) update_list = network_updates + learning_updates logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates]))) logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates]))) if False and config.data_transmitter: variables = [config.data_transmitter.get_iterator()] givens = config.data_transmitter.get_givens() else: variables = network.input_variables + network.target_variables givens = None self.learning_func = theano.function( variables, map(lambda v: theano.Out(v, borrow=True), self.training_variables), updates=update_list, allow_input_downcast=True, mode=self.config.get("theano_mode", None), givens=givens)
def test_binomial(): # TODO: test size=None, ndim=X # TODO: test size=X, ndim!=X.ndim # TODO: test random seed in legal value(!=0 and other) # TODO: test sample_size not a multiple of guessed #streams # TODO: test size=Var, with shape that change from call to call # we test size in a tuple of int and a tensor.shape. # we test the param p with int. if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or mode == 'Mode' and config.linker in ['py']): sample_size = (10, 50) steps = 50 rtol = 0.02 else: sample_size = (500, 50) steps = int(1e3) rtol = 0.01 x = tensor.matrix() v = tensor.vector() for mean in [0.1, 0.5]: for size, const_size, var_input, input in [ (sample_size, sample_size, [], []), (x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)]), ((x.shape[0], sample_size[1]), sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)]), # test empty size (scalar) ((), (), [], []), ]: # print '' # print 'ON CPU with size=(%s) and mean(%d):' % (str(size), mean) R = MRG_RandomStreams(234, use_cuda=False) # Note: we specify `nstreams` to avoid a warning. u = R.binomial(size=size, p=mean, nstreams=rng_mrg.guess_n_streams(size, warn=False)) f = theano.function(var_input, u, mode=mode) # theano.printing.debugprint(f) out = f(*input) # print 'random?[:10]\n', out[0, 0:10] # print 'random?[-1,-10:]\n', out[-1, -10:] # Increase the number of steps if sizes implies only a few samples if numpy.prod(const_size) < 10: steps_ = steps * 100 else: steps_ = steps basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input, allow_01=True, target_avg=mean, mean_rtol=rtol) if mode != 'FAST_COMPILE' and cuda_available: # print '' # print 'ON GPU with size=(%s) and mean(%d):' % (str(size), mean) R = MRG_RandomStreams(234, use_cuda=True) u = R.binomial(size=size, p=mean, dtype='float32', nstreams=rng_mrg.guess_n_streams(size, warn=False)) # well, it's really that this test w GPU doesn't make sense otw assert u.dtype == 'float32' f = theano.function( var_input, theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(u), borrow=True), mode=mode_with_gpu) # theano.printing.debugprint(f) gpu_out = numpy.asarray(f(*input)) # print 'random?[:10]\n', gpu_out[0, 0:10] # print 'random?[-1,-10:]\n', gpu_out[-1, -10:] basictest(f, steps_, const_size, prefix='mrg gpu', inputs=input, allow_01=True, target_avg=mean, mean_rtol=rtol) numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6) # print '' # print 'ON CPU w NUMPY with size=(%s) and mean(%d):' % (str(size), # mean) RR = theano.tensor.shared_randomstreams.RandomStreams(234) uu = RR.binomial(size=size, p=mean) ff = theano.function(var_input, uu, mode=mode) # It's not our problem if numpy generates 0 or 1 basictest(ff, steps_, const_size, prefix='numpy', allow_01=True, inputs=input, target_avg=mean, mean_rtol=rtol)
def _o(s): return tn.Out(s, borrow=True)
def train_model(self, lr_scheme,initial_learning_rate=0.01, min_lr=0.00001,learning_rate_decay=0.05,constant_steps=None,L1_reg=0.0000, L2_reg=0.0000,lr_global=False, n_epochs=100,momentum_term=0.9): logger.info("\n"+"\n".join(["\t%s : "%key+str(locals()[key]) for key in ["lr_scheme","lr_global","min_lr","initial_learning_rate","learning_rate_decay","L1_reg","L2_reg","n_epochs"]])) cost = self.model.negative_log_likelihood(self.y) \ + L2_reg * self.model.L2 #\ # + L1_reg * self.model.L1 self.learning_rate = theano.shared(np.float32(initial_learning_rate)) if constant_steps==None: self.constant_steps = np.inf else: self.constant_steps = constant_steps self.lr_scheme = lr_scheme def gen_updates_sgd(): gparams = [theano.grad(cost, param) for param in self.model.params] updates = [] for param_i, grad_i, n_in in zip(self.model.params, gparams, self.model.n_ins): if "embeddings" not in str(param_i): updates.append((param_i, param_i - self.learning_rate/n_in * grad_i)) else: updates.append((param_i, param_i - self.learning_rate * grad_i)) return updates def gen_updates_sgd_global(): gparams = [theano.grad(cost, param) for param in self.model.params] updates = [] for param_i, grad_i in zip(self.model.params, gparams): updates.append((param_i, param_i - self.learning_rate * grad_i)) return updates # def gen_updates_regular_momentum(loss, all_parameters, learning_rate, momentum, weight_decay): # all_grads = [theano.grad(loss, param) for param in all_parameters] # updates = [] # for param_i, grad_i in zip(all_parameters, all_grads): # mparam_i = theano.shared(param_i.get_value()*0.) # v = momentum * mparam_i - weight_decay * learning_rate * param_i - learning_rate * grad_i # updates.append((mparam_i, v)) # updates.append((param_i, param_i + v)) # return updates # # def gen_updates_own_momentum(): # agparams=[theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), name='ag_'+p.name) \ # for p in self.model.params] # averaged gradients # gparams = [] # gradients # for pid,param in enumerate(self.model.params): # gparam = T.grad(cost, param) # gparams.append(gparam) # updates = [] # for param, gparam, agparam, n_in in zip(self.model.params, gparams, agparams, self.model.n_ins): # updates.append((agparam,np.float32(1-momentum_term)*agparam + np.float32(momentum_term)*gparam)) # if lr_global: # updates.append((param, param - self.learning_rate/n_in * (np.float32(1-momentum_term)*agparam + np.float32(momentum_term)*gparam))) # else: # updates.append((param, param - self.learning_rate * (np.float32(1-momentum_term)*agparam + np.float32(momentum_term)*gparam))) # return updates if lr_global: updates = gen_updates_sgd_global() else: updates = gen_updates_sgd() train_model = theano.function(inputs=[self.index,self.permutation], outputs=theano.Out(cost, borrow=True), updates=updates, givens={ self.x: self.train_set_x[self.permutation[self.index * self.batch_size:(self.index + 1) * self.batch_size]], self.y: self.train_set_y[self.permutation[self.index * self.batch_size:(self.index + 1) * self.batch_size]]}, name="train_model") #============================================================================== # train model #============================================================================== theano.printing.pydotprint(train_model) logger.info('... training') min_valid_cost = np.inf best_epoch = 0 test_score = 0. start_time = time.clock() epoch = 0 self.trainingscosts=[] self.validationcosts=[] training_costs=[10] while (epoch <= n_epochs): self.trainingscosts.append(np.mean(training_costs)) validation_costs = [self.validation_cost(i) for i in xrange(self.n_valid_batches)] self.validationcosts.append(np.mean(validation_costs)) self.monitor_update() if self.validationcosts[-1]<min_valid_cost: min_valid_cost=self.validationcosts[-1] best_epoch=epoch self.test_error(epoch) if epoch%25==0: pickle.dump(self.model,open(os.path.join(self.modeldir,'model%i.pck'%epoch),'wb'),protocol=pickle.HIGHEST_PROTOCOL) hidden_values = [self.visualize_hidden(i) for i in np.random.randint(0,self.n_valid_batches,30)] image = np.vstack(hidden_values) binary_image = (image>0.999) | (image<-0.999) plt.imshow(binary_image,cmap=plt.cm.get_cmap('gray'), interpolation='nearest') plt.savefig(os.path.join(self.modeldir,'binary_hidden%i.png'%epoch)) plt.clf() test_predictions = [self.predictions(i) for i in xrange(self.n_test_batches)] np.save(os.path.join(self.modeldir,"predictions.npy"),test_predictions) generate_output(self.modeldir,modelnumber=epoch, predictions=np.array(test_predictions)) training_costs=[] perm=np.random.permutation(self.train_set_size).astype(np.int32) for minibatch_index in xrange(self.n_train_batches): training_costs.append(train_model(minibatch_index,perm)) if epoch>0: if self.lr_scheme!="constant": if self.lr_scheme=="continuous" and epoch>self.constant_steps: self.learning_rate.set_value(np.float32(initial_learning_rate*(1+learning_rate_decay* self.constant_steps)/(1+learning_rate_decay*max(epoch,self.constant_steps)))) elif ((self.validationcosts[-1]-self.validationcosts[-2])>0 and (self.validationcosts[-1]-np.min(self.validationcosts))>0.01 and \ np.argmin(self.validationcosts)!=(len(self.validationcosts)-2)) or \ (((self.trainingscosts[-1]-self.trainingscosts[-2])>0) and (np.argmin(self.trainingscosts)!=(len(self.trainingscosts)-2))): if self.lr_scheme=="stepwise": self.learning_rate.set_value(np.float32(self.learning_rate.get_value()/3)) elif self.lr_scheme=="continuous": self.constant_steps=epoch-1 self.learning_rate.set_value(np.float32(initial_learning_rate*(1+learning_rate_decay*self.constant_steps)/(1+learning_rate_decay*max(epoch,self.constant_steps)))) if self.learning_rate.get_value()<min_lr: self.learning_rate.set_value(np.float32(min_lr)) self.lr_scheme=="constant" epoch = epoch + 1 end_time = time.clock() logger.info(('Optimization complete. Best validation score of %f %% ' 'obtained at epoch %i, with test performance %f %%') % (min_valid_cost, best_epoch, test_score * 100.)) logger.info('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) self.monitor_update() test_predictions = [self.predictions(i) for i in xrange(self.n_test_batches)] generate_output(self.modeldir,predictions=np.array(test_predictions))
# updates_ada=lasagne.updates.adagrad(objective,get_all_params(l_out),learning_rate=.03) update = theano.function([l_in.input_var, target], [loss], updates=updates_other, allow_input_downcast=True) update_hid = theano.function([l_in.input_var, target], [loss], updates=updates_hid, allow_input_downcast=True) #update_ada=theano.function([l_in.input_var,target],[loss],updates=updates_ada,allow_input_downcast=True) update_scal = theano.function([l_in.input_var, target], [], updates=updates_scal, allow_input_downcast=True) check = theano.function([l_in.input_var, target], [loss_smoo], allow_input_downcast=True) predict = theano.function( [l_in.input_var], [theano.Out(get_output(l_out, deterministic=True), borrow=True)], allow_input_downcast=True) for fno in xrange(12, 1000): reset() #diagn=theano.function([l_in.input_var,train_indices,valid_indices,target],[train_loss,valid_loss],allow_input_downcast=True,name="jill") filename = "a_fitted_nnet_t_" + str(fno) + ".nc" assert not os.path.exists(filename) f = netcdf.netcdf_file(filename, "w") f.createDimension("train", train_X.shape[0]) f.createDimension("valid", valid_X.shape[0]) f.createDimension("test", test_X.shape[0]) f.createDimension("preds", 37) v_train = f.createVariable("train", np.float, ("train", "preds")) v_valid = f.createVariable("valid", np.float, ("valid", "preds"))
def test_uniform(): # TODO: test param low, high # TODO: test size=None # TODO: test ndim!=size.ndim # TODO: test bad seed # TODO: test size=Var, with shape that change from call to call if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or mode == 'Mode' and config.linker in ['py']): sample_size = (10, 100) steps = 50 else: sample_size = (500, 50) steps = int(1e3) x = tensor.matrix() for size, const_size, var_input, input in [ (sample_size, sample_size, [], []), (x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)]), ((x.shape[0], sample_size[1]), sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)]), # test empty size (scalar) ((), (), [], []), ]: #### TEST CPU IMPLEMENTATION #### # The python and C implementation are tested with DebugMode # print '' # print 'ON CPU with size=(%s):' % str(size) x = tensor.matrix() R = MRG_RandomStreams(234, use_cuda=False) # Note: we specify `nstreams` to avoid a warning. # TODO Look for all occurrences of `guess_n_streams` and `30 * 256` # for such situations: it would be better to instead filter the # warning using the warning module. u = R.uniform(size=size, nstreams=rng_mrg.guess_n_streams(size, warn=False)) f = theano.function(var_input, u, mode=mode) assert any([ isinstance(node.op, theano.sandbox.rng_mrg.mrg_uniform) for node in f.maker.fgraph.toposort() ]) # theano.printing.debugprint(f) cpu_out = f(*input) # print 'CPU: random?[:10], random?[-10:]' # print cpu_out[0, 0:10] # print cpu_out[-1, -10:] # Increase the number of steps if sizes implies only a few samples if numpy.prod(const_size) < 10: steps_ = steps * 100 else: steps_ = steps basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input) if mode != 'FAST_COMPILE' and cuda_available: # print '' # print 'ON GPU with size=(%s):' % str(size) R = MRG_RandomStreams(234, use_cuda=True) u = R.uniform(size=size, dtype='float32', nstreams=rng_mrg.guess_n_streams(size, warn=False)) # well, it's really that this test w GPU doesn't make sense otw assert u.dtype == 'float32' f = theano.function( var_input, theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(u), borrow=True), mode=mode_with_gpu) assert any([ isinstance(node.op, theano.sandbox.rng_mrg.GPU_mrg_uniform) for node in f.maker.fgraph.toposort() ]) # theano.printing.debugprint(f) gpu_out = numpy.asarray(f(*input)) # print 'GPU: random?[:10], random?[-10:]' # print gpu_out[0, 0:10] # print gpu_out[-1, -10:] basictest(f, steps_, const_size, prefix='mrg gpu', inputs=input) numpy.testing.assert_array_almost_equal(cpu_out, gpu_out, decimal=6) # print '' # print 'ON CPU w Numpy with size=(%s):' % str(size) RR = theano.tensor.shared_randomstreams.RandomStreams(234) uu = RR.uniform(size=size) ff = theano.function(var_input, uu, mode=mode) # It's not our problem if numpy generates 0 or 1 basictest(ff, steps_, const_size, prefix='numpy', allow_01=True, inputs=input)
def main(): # Turn these knobs if you wish to work with larger/smaller data img_dims = (500, 500) fsize = 2 n_channels = 3 # Create a random image img = np.asarray(np.random.rand(*((n_channels, ) + img_dims)), dtype=th.config.floatX) img = np.arange(n_channels * img_dims[0] * img_dims[1], dtype=th.config.floatX).reshape(n_channels, *img_dims) # Adapt the code to use the CPU/GPU. In the GPU case, do NOT transfer the # results back to memory. wrap = ((lambda x: x) if th.config.device == "cpu" else (lambda x: th.Out(th.sandbox.cuda.basic_ops.gpu_from_host(x), borrow=True))) # Convolution method x = th.shared(img) f = th.function(inputs=[], outputs=wrap(im_to_col(x, fsize, n_channels=n_channels)), name='im_to_col') # Time the convolution method tic = time.time() out_conv = f() conv_time = time.time() - tic print("Convolution-based method: {0}".format(conv_time)) # Time the neighbors method neighs = N.NeighbourhoodsFromImages(1, (fsize, fsize), strides=(1, 1), ignore_border=True)(x) f = th.function([], outputs=wrap(neighs), name='old neighs') tic = time.time() out_old = f() neigh_time = time.time() - tic print("Neighbors-based method: {0}".format(neigh_time)) # Time the new neighbours method ignore border neighs = N.images2neibs(x.dimshuffle('x', 0, 1, 2), (fsize, fsize), (1, 1), mode='ignore_borders') f = th.function([], outputs=wrap(neighs), name='new neighs ignore border') tic = time.time() out_new = f() neigh_time = time.time() - tic print("New Neighbors-based ignore border method: {0}".format(neigh_time)) # Time the new neighbours method neighs = N.images2neibs(x.dimshuffle('x', 0, 1, 2), (fsize, fsize), (1, 1), mode='valid') f = th.function([], outputs=wrap(neighs), name='new neighs valid') tic = time.time() out_new = f() neigh_time = time.time() - tic print("New Neighbors-based valid method: {0}".format(neigh_time)) # Print speedup results if conv_time < neigh_time: print("Conv faster than neigh. Speedup: {0}x".format(neigh_time / conv_time)) else: print("Neigh faster than conv. Speedup: {0}x".format(conv_time / neigh_time))
def test_normal0(): steps = 50 std = 2. if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or mode == 'Mode' and config.linker in ['py']): sample_size = (25, 30) default_rtol = .02 else: sample_size = (999, 50) default_rtol = .01 sample_size_odd = (sample_size[0], sample_size[1] - 1) x = tensor.matrix() for size, const_size, var_input, input, avg, rtol, std_tol in [ (sample_size, sample_size, [], [], -5., default_rtol, default_rtol), (x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)], -5., default_rtol, default_rtol), ((x.shape[0], sample_size[1]), sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)], -5., default_rtol, default_rtol), # test odd value (sample_size_odd, sample_size_odd, [], [], -5., default_rtol, default_rtol), # test odd value (x.shape, sample_size_odd, [x], [numpy.zeros(sample_size_odd, dtype=config.floatX)], -5., default_rtol, default_rtol), (sample_size, sample_size, [], [], numpy.arange(numpy.prod(sample_size), dtype='float32').reshape(sample_size), 10. * std / numpy.sqrt(steps), default_rtol), # test empty size (scalar) ((), (), [], [], -5., default_rtol, 0.02), # test with few samples at the same time ((1, ), (1, ), [], [], -5., default_rtol, 0.02), ((2, ), (2, ), [], [], -5., default_rtol, 0.02), ((3, ), (3, ), [], [], -5., default_rtol, 0.02), ]: # print '' # print 'ON CPU:' R = MRG_RandomStreams(234, use_cuda=False) # Note: we specify `nstreams` to avoid a warning. n = R.normal(size=size, avg=avg, std=std, nstreams=rng_mrg.guess_n_streams(size, warn=False)) f = theano.function(var_input, n, mode=mode) # theano.printing.debugprint(f) out = f(*input) # print 'random?[:10]\n', out[0, 0:10] # Increase the number of steps if size implies only a few samples if numpy.prod(const_size) < 10: steps_ = steps * 50 else: steps_ = steps basictest(f, steps_, const_size, target_avg=avg, target_std=std, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol, std_tol=std_tol) sys.stdout.flush() if mode != 'FAST_COMPILE' and cuda_available: # print '' # print 'ON GPU:' R = MRG_RandomStreams(234, use_cuda=True) n = R.normal(size=size, avg=avg, std=std, dtype='float32', nstreams=rng_mrg.guess_n_streams(size, warn=False)) # well, it's really that this test w GPU doesn't make sense otw assert n.dtype == 'float32' f = theano.function( var_input, theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(n), borrow=True), mode=mode_with_gpu) # theano.printing.debugprint(f) sys.stdout.flush() gpu_out = numpy.asarray(f(*input)) # print 'random?[:10]\n', gpu_out[0, 0:10] # print '----' sys.stdout.flush() basictest(f, steps_, const_size, target_avg=avg, target_std=std, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol, std_tol=std_tol) # Need to allow some rounding error as their is float # computation that are done on the gpu vs cpu assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6) # print '' # print 'ON CPU w NUMPY:' RR = theano.tensor.shared_randomstreams.RandomStreams(234) nn = RR.normal(size=size, avg=avg, std=std) ff = theano.function(var_input, nn) basictest(ff, steps_, const_size, target_avg=avg, target_std=std, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)
def multi(grads, params, other_contexts): inputs = theano.gof.graph.inputs(grads) inputs = [ inp for inp in inputs if not (isinstance(inp, T.Constant) or inp in params) ] symbolic_params = [_to_symbolic_var(p) for p in params] all_context_grads = [] for ctx_i, context in enumerate(other_contexts): sharded_inputs = [ theano.gpuarray.basic_ops.gpu_contiguous( inp[ctx_i::len(other_contexts) + 1]) for inp in inputs ] xfer_inputs = [ theano.gpuarray.as_gpuarray_variable(inp, context) for inp in sharded_inputs ] xfer_params = [ theano.gpuarray.as_gpuarray_variable(sp, context) for sp in symbolic_params ] replacements = { x: xfer_x for x, xfer_x in zip(params + inputs, xfer_params + xfer_inputs) } # For whatever reason, theano.clone likes to make its own copies of the # replacement nodes we give it, so we need to dig into its generated # graph to grab the copies it made. for var in (xfer_params + xfer_inputs): var.name = str(uuid.uuid4()) context_grad_graphs = [ theano.clone(g, replace=replacements) for g in grads ] new_inputs = [] for var in (xfer_params + xfer_inputs): for g in context_grad_graphs: matches = search(g, lambda x: x.name == var.name) if len(matches): new_inputs.append(matches[0]) break if len(new_inputs) != len(xfer_params + xfer_inputs): raise Exception() grads_fn = theano.function(new_inputs, [ theano.Out(g.transfer(context), borrow=True) for g in context_grad_graphs ]) context_grads_op = ContextGradsOp(grads_fn, context, len(params)) context_grads = context_grads_op(*(params + sharded_inputs)) if not (isinstance(context_grads, list) or isinstance(context_grads, tuple)): context_grads = [context_grads] all_context_grads.append(context_grads) # context -> grad to grad -> context all_context_grads = zip(*all_context_grads) for i in xrange(len(all_context_grads)): all_context_grads[i] = [g.transfer(None) for g in all_context_grads[i]] # # Also schedule work on the main GPU # for i in xrange(len(all_context_grads)): # sharded_inputs = [ # inp[len(other_contexts)::len(other_contexts)+1] # for inp in inputs # ] # all_context_grads[i].append( # theano.clone( # grads[i], # replace={inp:si for inp,si in zip(inputs,sharded_inputs)} # ) # ) avg_grads = [ reduce(lambda a, b: a + b, gs) / float(len(gs)) for gs in all_context_grads ] return avg_grads