def encoder(x, params, config): mb_size = config['mb_size'] num_hidden = config['num_hidden'] x = T.specify_shape(x, (128, 1, 28, 28)) #c_1 = ConvPoolLayer(in_length = 4000, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_1'], b = params['bc_enc_1']) #c_2 = ConvPoolLayer(in_length = 399, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_2'], b = params['bc_enc_2']) #c_3 = ConvPoolLayer(in_length = 38, batch_size = mb_size, stride = 2, activation = "relu", batch_norm = True, W = params['Wc_enc_3'], b = params['bc_enc_3']) h_out_1 = HiddenLayer(num_in = 784, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True) print "x ndim", x.ndim #c_1_value = T.specify_shape(c_1.output(x), (128, 96, 16, 16)) #c_2_value = c_2.output(c_1_value) #c_3_value = c_3.output(c_2_value) h_out_1_value = T.specify_shape(h_out_1.output(x.flatten(2)), (128, num_hidden)) h_out_2_value = h_out_2.output(h_out_1_value) return {'h' : h_out_2_value}
def test_specify_shape_inplace(self): # test that specify_shape don't break inserting inplace op dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = numpy.random.RandomState(utt.fetch_seed()) a = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) a = self.cast_value(a) a_shared = self.shared_constructor(a) b = numpy.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) b = self.cast_value(b) b_shared = self.shared_constructor(b) s = numpy.zeros((40, 40), dtype=dtype) s = self.cast_value(s) s_shared = self.shared_constructor(s) f = theano.function([], updates={s_shared: theano.dot(a_shared, b_shared) + s_shared}) topo = f.maker.env.toposort() f() # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)] if theano.config.mode != "FAST_COMPILE": assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all( node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm) ) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # Their is no inplace gemm for sparse # assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot") s_shared_specify = tensor.specify_shape(s_shared, s_shared.get_value(borrow=True).shape) # now test with the specify shape op in the output f = theano.function( [], s_shared.shape, updates={s_shared: theano.dot(a_shared, b_shared) + s_shared_specify} ) topo = f.maker.env.toposort() shp = f() assert numpy.all(shp == (40, 40)) if theano.config.mode != "FAST_COMPILE": assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all( node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm) ) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # now test with the specify shape op in the inputs and outputs a_shared = tensor.specify_shape(a_shared, a_shared.get_value(borrow=True).shape) b_shared = tensor.specify_shape(b_shared, b_shared.get_value(borrow=True).shape) f = theano.function( [], s_shared.shape, updates={s_shared: theano.dot(a_shared, b_shared) + s_shared_specify} ) topo = f.maker.env.toposort() shp = f() assert numpy.all(shp == (40, 40)) if theano.config.mode != "FAST_COMPILE": assert sum([node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo]) == 1 assert all( node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm) ) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
def bench_deep1000(variant=True): name = "mlp_784_1000_1000_1000_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)), name='w0') b0 = shared(zeros(1000), name='b0') w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w1') b1 = shared(zeros(1000), name='b1') w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w2') b2 = shared(zeros(1000), name='b2') v = shared(zeros(1000, outputs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy params = [w0, b0, w1, b1, w2, b2, v, c] h0 = tanh(dot(sx_, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) p_y_given_x = softmax(dot(h2, v) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gparams = grad(cost, params) train = function([si, nsi], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)], name=name) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs h0 = tanh(dot(ssx_, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) p_y_given_x = softmax(dot(h2, v) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gparams = grad(cost, params) train2 = function([], cost, updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)], name=name) snsi.set_value(GlobalBenchReporter.batch_size) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def __init__(self, num_hidden, num_features, seq_length, mb_size, tf_states, rf_states): tf_states = T.specify_shape(tf_states, (seq_length, mb_size, num_features)) rf_states = T.specify_shape(rf_states, (seq_length, mb_size, num_features)) hidden_state_features = T.specify_shape(T.concatenate([tf_states, rf_states], axis = 1), (seq_length, mb_size * 2, num_features)) gru_params_1 = init_tparams(param_init_gru(None, {}, prefix = "gru1", dim = num_hidden, nin = num_features)) #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features)) #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features)) gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix = 'gru1')[0] #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0] #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0] final_out_recc = T.specify_shape(T.mean(gru_1_out, axis = 0), (mb_size * 2, num_hidden)) h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units = 1, nonlinearity=None) h_out_1_value = h_out_1.get_output_for(final_out_recc) h_out_4_value = h_out_4.get_output_for(h_out_1_value) raw_y = h_out_4_value #raw_y = T.clip(h_out_4_value, -10.0, 10.0) classification = T.nnet.sigmoid(raw_y) #tf comes before rf. p_real = classification[:mb_size] p_gen = classification[mb_size:] #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r)) self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean() self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean() self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean() self.d_cost = self.d_cost_real + self.d_cost_gen self.g_cost = self.g_cost_d self.classification = classification self.params = [] self.params += lasagne.layers.get_all_params(h_out_4,trainable=True) #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True) #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True) self.params += lasagne.layers.get_all_params(h_out_1,trainable=True) self.params += gru_params_1.values() #self.params += gru_params_2.values() #self.params += gru_params_3.values() self.accuracy = T.mean(T.eq(T.ones(p_real.shape).flatten(), T.gt(p_real, 0.5).flatten())) + T.mean(T.eq(T.ones(p_gen.shape).flatten(), T.lt(p_gen, 0.5).flatten()))
def bench_logreg(variant=True): name = "mlp_784_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker v = shared(zeros(outputs, inputs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy # # Note on the transposed-ness of v for some reason, this data # layout is faster than the non-transposed orientation. # The change doesn't make much difference in the deeper models, # but in this case it was more than twice as fast. # p_y_given_x = softmax(dot(sx_, v.T) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gv, gc = grad(cost, [v, c]) #theano.printing.debugprint(grad(cost, [v, c]), file=open('foo', 'wb')) train = function([si, nsi], [], updates={v: v - lr * gv, c: c - lr * gc}, name=name) # theano.printing.debugprint(train, print_type=True) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs snsi.set_value(GlobalBenchReporter.batch_size) p_y_given_x = softmax(dot(ssx_, v.T) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gv, gc = grad(cost, [v, c]) train2 = function([], [], updates={v: v - lr * gv, c: c - lr * gc, ssi: ssi + snsi}, name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def bench_mlp_500(variant=True): name = "mlp_784_500_10_b" + str(GlobalBenchReporter.batch_size) name += "_" + config.linker HUs = 500 w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)), name='w') b = shared(zeros(HUs), name='b') v = shared(zeros(outputs, HUs), name='v') c = shared(zeros(outputs), name='c') if GlobalBenchReporter.batch_size == 1: sx_ = sx.flatten() sy_ = specify_shape(sy, [1]) ssx_ = ssx.flatten() ssy_ = specify_shape(ssy, [1]) else: sx_ = sx sy_ = sy ssx_ = ssx ssy_ = ssy p_y_given_x = softmax(dot(tanh(dot(sx_, w.T) + b), v.T) + c) nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_] cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train = function([si, nsi], cost, updates={w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc}, name=name) GlobalBenchReporter.eval_model(train, name) if not variant: return # Version with no inputs snsi.set_value(GlobalBenchReporter.batch_size) p_y_given_x = softmax(dot(tanh(dot(ssx_, w.T) + b), v.T) + c) nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_] cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train2 = function([], cost, updates={w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc, ssi: ssi + snsi}, name=name) GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
def output(self, input): W_shuffled = self.W.dimshuffle(3, 0, 1, 2) # c01b to bc01 print "input ndim", input.ndim conv_out = dnn.dnn_conv(img=input, kerns=W_shuffled, subsample=(self.stride, self.stride), border_mode=self.padsize) conv_out = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') if self.batch_norm: conv_out = (conv_out - T.mean(conv_out, axis = (0,2,3), keepdims = True)) / (1.0 + T.std(conv_out, axis=(0,2,3), keepdims = True)) conv_out = conv_out * T.addbroadcast(self.bn_std,0,2,3) + T.addbroadcast(self.bn_mean, 0,2,3) self.out_store = conv_out if self.activation == "relu": self.out = T.maximum(0.0, conv_out) elif self.activation == "tanh": self.out = T.tanh(conv_out) elif self.activation == None: self.out = conv_out return T.specify_shape(self.out, (self.batch_size, self.out_channels, self.in_length / self.stride, self.in_length / self.stride))
def encoder(x, params, config): mb_size = config['mb_size'] num_hidden = config['num_hidden'] c_1 = ConvPoolLayer(in_channels = 1, out_channels = 128, in_length = 4000, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = True, W = params['Wc_enc_1'], b = params['bc_enc_1']) c_2 = ConvPoolLayer(in_channels = 128, out_channels = 256, in_length = 399, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = True, W = params['Wc_enc_2'], b = params['bc_enc_2']) c_3 = ConvPoolLayer(in_channels = 256, out_channels = 512, in_length = 38, batch_size = mb_size, kernel_len = 10, stride = 5, activation = "relu", batch_norm = True, W = params['Wc_enc_3'], b = params['bc_enc_3']) h_out_1 = HiddenLayer(num_in = 512 * 6, num_out = num_hidden, W = params['W_enc_1'], b = params['b_enc_1'], activation = 'relu', batch_norm = True) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, W = params['W_enc_2'], b = params['b_enc_2'], activation = 'relu', batch_norm = True) print "x ndim", x.ndim c_1_value = T.specify_shape(c_1.output(x.reshape((128,1,4000))), (128, 128, 399)) c_2_value = c_2.output(c_1_value) c_3_value = c_3.output(c_2_value) h_out_1_value = h_out_1.output(c_3_value.flatten(2)) h_out_2_value = h_out_2.output(h_out_1_value) return {'h' : h_out_2_value}
def test_specify_shape_partial(self): dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = np.random.RandomState(utt.fetch_seed()) x1_1 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_1 = self.cast_value(x1_1) x1_2 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_2 = self.cast_value(x1_2) x2 = np.asarray(rng.uniform(1, 2, [5, 2]), dtype=dtype) x2 = self.cast_value(x2) # Test that we can replace with values of the same shape x1_shared = self.shared_constructor(x1_1) x1_specify_shape = tensor.specify_shape(x1_shared, (tensor.as_tensor_variable(x1_1.shape[0]), x1_shared.shape[1])) x1_shared.set_value(x1_2) assert np.allclose( self.ref_fct(x1_shared.get_value(borrow=True)), self.ref_fct( x1_2)) shape_op_fct = theano.function([], x1_shared.shape) topo = shape_op_fct.maker.fgraph.toposort() shape_op_fct() if theano.config.mode != 'FAST_COMPILE': assert len(topo) == 3 assert isinstance(topo[0].op, tensor.opt.Shape_i) assert isinstance(topo[1].op, tensor.opt.Shape_i) assert isinstance(topo[2].op, tensor.opt.MakeVector) # Test that we forward the input specify_shape_fct = theano.function([], x1_specify_shape) specify_shape_fct() # theano.printing.debugprint(specify_shape_fct) assert np.all(self.ref_fct(specify_shape_fct()) == self.ref_fct(x1_2)) topo_specify = specify_shape_fct.maker.fgraph.toposort() if theano.config.mode != 'FAST_COMPILE': assert len(topo_specify) == 4 # Test that we put the shape info into the graph shape_constant_fct = theano.function([], x1_specify_shape.shape) # theano.printing.debugprint(shape_constant_fct) assert np.all(shape_constant_fct() == shape_op_fct()) topo_cst = shape_constant_fct.maker.fgraph.toposort() if theano.config.mode != 'FAST_COMPILE': assert len(topo_cst) == 2 # Test that we can replace with values of the different shape # but that will raise an error in some case, but not all x1_shared.set_value(x2) self.assertRaises(AssertionError, specify_shape_fct) # No assertion will be raised as the Op is removed from the graph if theano.config.mode not in ['FAST_COMPILE', 'DebugMode', 'DEBUG_MODE']: shape_constant_fct() else: self.assertRaises(AssertionError, shape_constant_fct)
def shapely_tensor( name , x , dtype='float64'): '''Return SYMBOLIC tensor with the same dimensions and size as input.''' if isinstance(x,type(0)): return Th.dscalar(name) if isinstance(x,type(numpy.array([]))): dtensor_x = Th.TensorType(dtype, (False,)*x.ndim) return Th.specify_shape(dtensor_x(name),x.shape) raise TypeError('shapely_tensor expects a scalar or numpy ndarray')
def output(self, x): y = T.alloc(0.0, self.output_shape[0], self.output_shape[1], self.output_shape[2]) y = T.set_subtensor(y[:, :, 0::2], x) y = T.set_subtensor(y[:, :, 0::2], x) y = T.set_subtensor(y[:, :, 1::2], x) y = T.set_subtensor(y[:, :, 1::2], x) return T.specify_shape(y, self.output_shape)
def discriminator(x, z, params, mb_size, num_hidden, num_latent): import random as rng srng = theano.tensor.shared_randomstreams.RandomStreams(420) c_1 = ConvPoolLayer(in_channels = 1, out_channels = 128, in_length = 4000, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = False, W = params['W_c_1'], b = params['b_c_1']) c_2 = ConvPoolLayer(in_channels = 128, out_channels = 256, in_length = 399, batch_size = mb_size, kernel_len = 20, stride = 10, activation = "relu", batch_norm = False, W = params['W_c_2'], b = params['b_c_2']) c_3 = ConvPoolLayer(in_channels = 256, out_channels = 512, in_length = 38, batch_size = mb_size, kernel_len = 10, stride = 5, activation = "relu", batch_norm = False, W = params['W_c_3'], b = params['b_c_3']) c_h_1 = HiddenLayer(num_in = 6 * 512, num_out = num_hidden, W = params['W_ch_1'], b = params['b_ch_1'], activation = 'relu', batch_norm = False) h_out_1 = HiddenLayer(num_in = num_hidden + num_latent, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_1'], b = params['b_disc_1']) h_out_2 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_2'], b = params['b_disc_2']) h_out_3 = HiddenLayer(num_in = num_hidden, num_out = num_hidden, activation = 'relu', batch_norm = False, W = params['W_disc_3'], b = params['b_disc_3']) h_out_4 = HiddenLayer(num_in = num_hidden, num_out = 1, activation = None, batch_norm = False, W = params['W_disc_4'], b = params['b_disc_4']) c_1_value = T.specify_shape(c_1.output(dropout(x, 0.8).reshape((128,1,4000))), (128,128,399)) c_2_value = T.specify_shape(c_2.output(c_1_value), (128,256,38)) c_3_value = T.specify_shape(c_3.output(c_2_value), (128,512,6)) c_h_1_value = c_h_1.output(c_3_value.flatten(2)) h_out_1_value = dropout(h_out_1.output(T.concatenate([z, c_h_1_value], axis = 1))) h_out_2_value = dropout(h_out_2.output(h_out_1_value), 0.2) h_out_3_value = dropout(h_out_3.output(h_out_2_value), 0.2) h_out_4_value = h_out_4.output(h_out_3_value) raw_y = h_out_4_value classification = T.nnet.sigmoid(raw_y) results = {'c' : classification} return results
def output(self, input): input = T.specify_shape(input, (self.batch_size, self.in_channels, self.in_length)) conv_out = conv1d_mc0(input, self.W, image_shape = (self.batch_size, self.in_channels, self.in_length), filter_shape = (self.out_channels, self.in_channels, self.filter_length), subsample = (self.stride,)) #was mb, filters, x, y #now mb, filters, x if self.batch_norm: conv_out = (conv_out - T.mean(conv_out, axis = (0,2), keepdims = True)) / (1.0 + T.std(conv_out, axis=(0,2), keepdims = True)) conv_out += self.b.dimshuffle('x', 0, 'x') if self.activation == "relu": self.out = T.maximum(0.0, conv_out) elif self.activation == "tanh": self.out = T.tanh(conv_out) elif self.activation == None: self.out = conv_out return self.out
def get_network(config, params, sequence, do_sample): mb_size = config['mb_size'] seq_length = config['seq_length'] num_hidden = config['num_hidden'] sequence_ver = T.specify_shape(sequence * 1.0, (seq_length, mb_size)) initial_states = theano.shared(np.zeros(shape = (mb_size, 2 * config['num_hidden'])).astype('float32')) initial_output = theano.shared(np.zeros(shape = (mb_size,)).astype('float32')) initial_loss = theano.shared(np.zeros(shape = (mb_size,)).astype('float32')) sequence_features = T.specify_shape(sequence_ver[:-1,:], (seq_length - 1, mb_size)) sequence_target = T.specify_shape(sequence_ver[1:,:], (seq_length - 1, mb_size)) use_samples = T.specify_shape(do_sample, (seq_length - 1,)) results, _ = theano.scan(fn=lambda *inp: rnn_one_step(config, params, *inp), sequences=[sequence_features, sequence_target, use_samples], outputs_info=[initial_states, initial_output, initial_loss],non_sequences=[],n_steps = seq_length - 1) results[0] = T.specify_shape(results[0], (seq_length - 1, mb_size, 2 * num_hidden)) results[1] = T.specify_shape(results[1], (seq_length - 1, mb_size)) results[2] = T.specify_shape(results[2], (seq_length - 1, mb_size)) return {'states' : results[0], 'output' : results[1], 'loss' : results[2]}
# Hmm... join does not seem to be documented anywhere z = T.join(0, x, y) # a uniform distribution over 0,1 in a 5x4 tensor xv = np.random.rand(5, 4) yv = np.random.rand(3, 3) f = theano.function([x, y], z.shape) theano.printing.debugprint(f) # should lead to error of mismatched indices but does not print f(xv, yv) # instead, compute values and not just shape # and an error is thrown f = theano.function([x, y], z) theano.printing.debugprint(f) #print f(xv,yv) # specifiying exact shape x = T.matrix('x') x_specify_shape = T.specify_shape(x, (2, 2)) f = theano.function([x], (x_specify_shape**2).shape) theano.printing.debugprint(f)
def test_specifyshape(self): self.check_rop_lop(tensor.specify_shape(self.x, self.in_shape), self.in_shape)
def rnn_one_step(config, params, observed_sequence_last, observed_sequence_current, use_samples, last_states, last_outputs, last_loss): mb_size = config['mb_size'] num_hidden = config['num_hidden'] last_states = T.specify_shape(last_states, (config['mb_size'],2 * config['num_hidden'])) last_outputs = T.specify_shape(last_outputs, (config['mb_size'],)) obs_last = T.specify_shape(observed_sequence_last, (mb_size,)).reshape((mb_size,1)) obs_curr = T.specify_shape(observed_sequence_current, (mb_size,)) obs_use = theano.ifelse.ifelse(use_samples, last_outputs.reshape((mb_size,1)), obs_last) last_states_1 = last_states[:,0:1024] last_states_2 = last_states[:,1024:2048] next_states_1 = T.specify_shape(gru_layer(params,state_below = obs_use, options = None, prefix='gru1', mask=None, one_step=True, init_state=last_states_1, backwards=False)[0], (mb_size, num_hidden)) next_states_2 = T.specify_shape(gru_layer(params,state_below = next_states_1, options = None, prefix='gru2', mask=None, one_step=True, init_state=last_states_2, backwards=False)[0], (mb_size, num_hidden)) h1 = T.specify_shape(fflayer(params,next_states_2,options=None,prefix='ff_h1',activ='lambda x: tensor.maximum(x,0.0)'), (mb_size, num_hidden)) h2 = T.specify_shape(fflayer(params,h1,options=None,prefix='ff_h2',activ='lambda x: tensor.maximum(x,0.0)'), (mb_size, num_hidden)) y = T.specify_shape(fflayer(params,h2,options = None,prefix='ff_1',activ='lambda x: x').flatten(), (mb_size,)) #y = T.specify_shape(T.sum(next_states, axis = 1), (mb_size,)) loss = T.sqr(y - obs_curr) obs_curr = T.specify_shape(observed_sequence_current, (mb_size,)) next_outputs = y next_states = T.specify_shape(T.concatenate([next_states_1, next_states_2], axis = 1), (mb_size, num_hidden * 2)) return next_states, next_outputs, loss
def test_specify_shape_inplace(self): # test that specify_shape don't break inserting inplace op dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = np.random.RandomState(utt.fetch_seed()) a = np.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) a = self.cast_value(a) a_shared = self.shared_constructor(a) b = np.asarray(rng.uniform(1, 2, [40, 40]), dtype=dtype) b = self.cast_value(b) b_shared = self.shared_constructor(b) s = np.zeros((40, 40), dtype=dtype) s = self.cast_value(s) s_shared = self.shared_constructor(s) f = theano.function( [], updates=[(s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared)], ) topo = f.maker.fgraph.toposort() f() # [Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)] if theano.config.mode != "FAST_COMPILE": assert (sum([ node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo ]) == 1) assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # Their is no inplace gemm for sparse # assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "StructuredDot") s_shared_specify = tensor.specify_shape( s_shared, s_shared.get_value(borrow=True).shape) # now test with the specify shape op in the output f = theano.function( [], s_shared.shape, updates=[ (s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared_specify) ], ) topo = f.maker.fgraph.toposort() shp = f() assert np.all(shp == (40, 40)) if theano.config.mode != "FAST_COMPILE": assert (sum([ node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo ]) == 1) assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm") # now test with the specify shape op in the inputs and outputs a_shared = tensor.specify_shape( a_shared, a_shared.get_value(borrow=True).shape) b_shared = tensor.specify_shape( b_shared, b_shared.get_value(borrow=True).shape) f = theano.function( [], s_shared.shape, updates=[ (s_shared, theano.tensor.dot(a_shared, b_shared) + s_shared_specify) ], ) topo = f.maker.fgraph.toposort() shp = f() assert np.all(shp == (40, 40)) if theano.config.mode != "FAST_COMPILE": assert (sum([ node.op.__class__.__name__ in ["Gemm", "GpuGemm", "StructuredDot"] for node in topo ]) == 1) assert all(node.op == tensor.blas.gemm_inplace for node in topo if isinstance(node.op, tensor.blas.Gemm)) assert all(node.op.inplace for node in topo if node.op.__class__.__name__ == "GpuGemm")
# Hmm... join does not seem to be documented anywhere z = T.join(0, x, y) # a uniform distribution over 0,1 in a 5x4 tensor xv = np.random.rand(5,4) yv = np.random.rand(3,3) f = theano.function([x,y], z.shape) theano.printing.debugprint(f) # should lead to error of mismatched indices but does not print f(xv, yv) # instead, compute values and not just shape # and an error is thrown f = theano.function([x,y], z) theano.printing.debugprint(f) #print f(xv,yv) # specifiying exact shape x = T.matrix('x') x_specify_shape = T.specify_shape(x, (2,2)) f = theano.function([x], (x_specify_shape ** 2).shape) theano.printing.debugprint(f)
def local_gpua_specifyShape(node): if isinstance(node.inputs[0].type, GpuArrayType): return inp = [gpu_from_host(node.inputs[0])] + node.inputs[1:] return tensor.specify_shape(*inp)
def local_gpua_specifyShape(node, context_name): if isinstance(node.inputs[0].type, GpuArrayType): return inp = [as_gpuarray_variable(node.inputs[0], context_name)] inp += node.inputs[1:] return tensor.specify_shape(*inp)
def __init__(self, num_hidden, num_features, seq_length, mb_size, tf_states, rf_states): tf_states = T.specify_shape(tf_states, (seq_length, mb_size, num_features)) rf_states = T.specify_shape(rf_states, (seq_length, mb_size, num_features)) hidden_state_features = T.specify_shape( T.concatenate([tf_states, rf_states], axis=1), (seq_length, mb_size * 2, num_features)) gru_params_1 = init_tparams( param_init_gru(None, {}, prefix="gru1", dim=num_hidden, nin=num_features)) #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features)) #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features)) gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix='gru1')[0] #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0] #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0] final_out_recc = T.specify_shape(T.mean(gru_1_out, axis=0), (mb_size * 2, num_hidden)) h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units=num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units=1, nonlinearity=None) h_out_1_value = h_out_1.get_output_for(final_out_recc) h_out_4_value = h_out_4.get_output_for(h_out_1_value) raw_y = h_out_4_value #raw_y = T.clip(h_out_4_value, -10.0, 10.0) classification = T.nnet.sigmoid(raw_y) #tf comes before rf. p_real = classification[:mb_size] p_gen = classification[mb_size:] #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r)) self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean() self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean() self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean() self.d_cost = self.d_cost_real + self.d_cost_gen self.g_cost = self.g_cost_d self.classification = classification self.params = [] self.params += lasagne.layers.get_all_params(h_out_4, trainable=True) #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True) #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True) self.params += lasagne.layers.get_all_params(h_out_1, trainable=True) self.params += gru_params_1.values() #self.params += gru_params_2.values() #self.params += gru_params_3.values() self.accuracy = T.mean( T.eq(T.ones(p_real.shape).flatten(), T.gt(p_real, 0.5).flatten())) + T.mean( T.eq( T.ones(p_gen.shape).flatten(), T.lt(p_gen, 0.5).flatten()))
def test_optimize_xent_vector4(self): # Same as test_optimize_xent_vector2, but y is the result of # a "specify_shape" that indicates its length is 1, so the # constant-folding of arange(y.shape[0]) happen before the xent # optimization verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) b_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') b = T.vector('b') y_ = T.lvector('y_') y = T.specify_shape(y_, (1,)) ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])] for expr in bias_expressions: f = theano.function([x, b, y_], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] # [big_op, sum, dim_shuffle, specify_shape] assert len(ops) <= 4 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [1 for o in ops if isinstance(o, T.AdvancedSubtensor)] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
import numpy as np import theano import theano.tensor as T import lasagne from lasagne.nonlinearities import softmax, linear # ================================================ # Demonstration of how to compute the missing term in the posterior recurrent # equation xsamp = T.matrix('x') specify_shape = T.specify_shape(xsamp, (3, 2)) Nsamps, _ = xsamp.shape[0], xsamp.shape[1] xDim = 2 # Define the NN. NNEvolve = lasagne.layers.InputLayer((None, xDim), name='IL') NNEvolve = lasagne.layers.DenseLayer(NNEvolve, 30, nonlinearity=softmax, W=lasagne.init.Orthogonal(), name='_HL1') NNEvolve = lasagne.layers.DenseLayer(NNEvolve, xDim**2, nonlinearity=linear, W=lasagne.init.Uniform(0.9), name='_OL') B = lasagne.layers.get_output(NNEvolve, xsamp) B = T.sum(xsamp**2)
def __init__(self, n_hidden: int, datafile: str, pathway_name: str, par_modulation_scale: float = 1 / 2): """ loads the mechanistic model as theano operator with loss as output and decoder output as input :param datafile: path to data csv :param pathway_name: name of pathway to use for model :param n_hidden: number of nodes in the hidden layer of the encoder :param par_modulation_scale: currently this parameter only influences the strength of l2 regularization on the inflate layer (the respective gaussian prior has its standard deviation defined based on the value of this parameter). For bounded inflate functions, this parameter is also intended to rescale the inputs accordingly. """ self.data_name = os.path.splitext(os.path.basename(datafile))[0] self.pathway_name = pathway_name self.par_modulation_scale = par_modulation_scale self.petab_importer = load_petab(datafile, 'pw_' + pathway_name, par_modulation_scale) self.pypesto_subproblem = self.petab_importer.create_problem() self.n_samples = len(self.petab_importer.petab_problem.condition_df) self.n_visible = len(self.petab_importer.petab_problem.observable_df) self.n_model_inputs = int(sum(name.startswith(MODEL_FEATURE_PREFIX) for name in self.pypesto_subproblem.x_names) / self.n_samples) self.n_kin_params = \ self.pypesto_subproblem.dim - self.n_model_inputs * self.n_samples input_data = self.petab_importer.petab_problem.measurement_df.pivot( index=petab.SIMULATION_CONDITION_ID, columns=petab.OBSERVABLE_ID, values=petab.MEASUREMENT ) # zero center input data, this is equivalent to estimating biases # for linear autoencoders # https://link.springer.com/article/10.1007/BF00332918 # https://arxiv.org/pdf/1901.08168.pdf input_data -= input_data.mean() self.sample_names = list(input_data.index) super().__init__(input_data=input_data.values, n_hidden=n_hidden, n_params=self.n_model_inputs) # set tolerances self.pypesto_subproblem.objective._objectives[0].amici_solver\ .setAbsoluteTolerance(1e-12) self.pypesto_subproblem.objective._objectives[0].amici_solver\ .setRelativeTolerance(1e-10) self.pypesto_subproblem.objective._objectives[0].amici_solver\ .setAbsoluteToleranceSteadyState(1e-10) self.pypesto_subproblem.objective._objectives[0].amici_solver\ .setRelativeToleranceSteadyState(1e-8) # define model theano op self.loss = TheanoLogProbability(self.pypesto_subproblem) # these are the kinetic parameters that are shared across all samples self.kin_pars = tt.specify_shape(tt.vector('kinetic_parameters'), (self.n_kin_params,)) self.x_names = self.x_names + [ name for ix, name in enumerate(self.pypesto_subproblem.x_names) if not name.startswith(MODEL_FEATURE_PREFIX) and ix in self.pypesto_subproblem.x_free_indices ] # assemble input to model theano op encoded_pars = self.encode_params(self.encoder_pars) self.model_pars = tt.concatenate([ self.kin_pars, tt.reshape(encoded_pars, (self.n_model_inputs * self.n_samples,))], axis=0 )
def test_optimize_xent_vector4(self): # Same as test_optimize_xent_vector2, but y is the result of # a "specify_shape" that indicates its length is 1, so the # constant-folding of arange(y.shape[0]) happen before the xent # optimization verbose = 0 mode = theano.compile.mode.get_default_mode() if mode == theano.compile.mode.get_mode('FAST_COMPILE'): mode = 'FAST_RUN' rng = numpy.random.RandomState(utt.fetch_seed()) x_val = rng.randn(5).astype(config.floatX) b_val = rng.randn(5).astype(config.floatX) y_val = numpy.asarray([2]) x = T.vector('x') b = T.vector('b') y_ = T.lvector('y_') y = T.specify_shape(y_, (1, )) ## Test that a biased softmax is optimized correctly bias_expressions = [ T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])), -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]), T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y]) ] for expr in bias_expressions: f = theano.function([x, b, y_], expr, mode=mode) if verbose: printing.debugprint(f) try: ops = [node.op for node in f.maker.fgraph.toposort()] # [big_op, sum, dim_shuffle, specify_shape] assert len(ops) <= 4 assert crossentropy_softmax_argmax_1hot_with_bias in ops assert not [ 1 for o in ops if isinstance(o, T.AdvancedSubtensor) ] f(x_val, b_val, y_val) except Exception: theano.printing.debugprint(f) raise backup = config.warn.sum_div_dimshuffle_bug config.warn.sum_div_dimshuffle_bug = False try: g = theano.function([x, b, y], T.grad(expr, x), mode=mode) finally: config.warn.sum_div_dimshuffle_bug = backup if verbose: printing.debugprint(g) try: ops = [node.op for node in g.maker.fgraph.toposort()] assert len(ops) <= 6 assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_with_bias in ops assert softmax_grad not in ops g(x_val, b_val, y_val) except Exception: theano.printing.debugprint(g) raise
def local_gpua_specifyShape(node, context_name): if isinstance(node.inputs[0].type, GpuArrayType): return inp = [GpuFromHost(context_name)(node.inputs[0])] + node.inputs[1:] return tensor.specify_shape(*inp)
def test_specify_shape(self): dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = np.random.RandomState(utt.fetch_seed()) x1_1 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_1 = self.cast_value(x1_1) x1_2 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_2 = self.cast_value(x1_2) x2 = np.asarray(rng.uniform(1, 2, [4, 3]), dtype=dtype) x2 = self.cast_value(x2) # Test that we can replace with values of the same shape x1_shared = self.shared_constructor(x1_1) x1_specify_shape = tensor.specify_shape(x1_shared, x1_1.shape) x1_shared.set_value(x1_2) assert np.allclose(self.ref_fct(x1_shared.get_value(borrow=True)), self.ref_fct(x1_2)) shape_op_fct = theano.function([], x1_shared.shape) topo = shape_op_fct.maker.fgraph.toposort() if theano.config.mode != "FAST_COMPILE": assert len(topo) == 3 assert isinstance(topo[0].op, tensor.opt.Shape_i) assert isinstance(topo[1].op, tensor.opt.Shape_i) assert isinstance(topo[2].op, tensor.opt.MakeVector) # Test that we forward the input specify_shape_fct = theano.function([], x1_specify_shape) assert np.all( self.ref_fct(specify_shape_fct()) == self.ref_fct(x1_2)) topo_specify = specify_shape_fct.maker.fgraph.toposort() assert len(topo_specify) == 2 # Test that we put the shape info into the graph shape_constant_fct = theano.function([], x1_specify_shape.shape) assert np.all(shape_constant_fct() == shape_op_fct()) topo_cst = shape_constant_fct.maker.fgraph.toposort() if theano.config.mode != "FAST_COMPILE": assert len(topo_cst) == 1 topo_cst[0].op == theano.compile.function.types.deep_copy_op # Test that we can take the grad. if theano.sparse.enable_sparse and isinstance( x1_specify_shape.type, theano.sparse.SparseType): # SparseVariable don't support sum for now. assert not hasattr(x1_specify_shape, "sum") else: shape_grad = tensor.grad(x1_specify_shape.sum(), x1_shared) shape_constant_fct_grad = theano.function([], shape_grad) # theano.printing.debugprint(shape_constant_fct_grad) shape_constant_fct_grad() # Test that we can replace with values of the different shape # but that will raise an error in some case, but not all specify_shape_fct() x1_shared.set_value(x2) with pytest.raises(AssertionError): specify_shape_fct() # No assertion will be raised as the Op is removed from the graph # when their is optimization if theano.config.mode not in [ "FAST_COMPILE", "DebugMode", "DEBUG_MODE" ]: shape_constant_fct() else: with pytest.raises(AssertionError): shape_constant_fct()
def test_specify_shape_partial(self): dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = np.random.RandomState(utt.fetch_seed()) x1_1 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_1 = self.cast_value(x1_1) x1_2 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_2 = self.cast_value(x1_2) x2 = np.asarray(rng.uniform(1, 2, [5, 2]), dtype=dtype) x2 = self.cast_value(x2) # Test that we can replace with values of the same shape x1_shared = self.shared_constructor(x1_1) x1_specify_shape = tensor.specify_shape( x1_shared, (tensor.as_tensor_variable(x1_1.shape[0]), x1_shared.shape[1]), ) x1_shared.set_value(x1_2) assert np.allclose(self.ref_fct(x1_shared.get_value(borrow=True)), self.ref_fct(x1_2)) shape_op_fct = theano.function([], x1_shared.shape) topo = shape_op_fct.maker.fgraph.toposort() shape_op_fct() if theano.config.mode != "FAST_COMPILE": assert len(topo) == 3 assert isinstance(topo[0].op, tensor.opt.Shape_i) assert isinstance(topo[1].op, tensor.opt.Shape_i) assert isinstance(topo[2].op, tensor.opt.MakeVector) # Test that we forward the input specify_shape_fct = theano.function([], x1_specify_shape) specify_shape_fct() # theano.printing.debugprint(specify_shape_fct) assert np.all( self.ref_fct(specify_shape_fct()) == self.ref_fct(x1_2)) topo_specify = specify_shape_fct.maker.fgraph.toposort() if theano.config.mode != "FAST_COMPILE": assert len(topo_specify) == 4 # Test that we put the shape info into the graph shape_constant_fct = theano.function([], x1_specify_shape.shape) # theano.printing.debugprint(shape_constant_fct) assert np.all(shape_constant_fct() == shape_op_fct()) topo_cst = shape_constant_fct.maker.fgraph.toposort() if theano.config.mode != "FAST_COMPILE": assert len(topo_cst) == 2 # Test that we can replace with values of the different shape # but that will raise an error in some case, but not all x1_shared.set_value(x2) with pytest.raises(AssertionError): specify_shape_fct() # No assertion will be raised as the Op is removed from the graph if theano.config.mode not in [ "FAST_COMPILE", "DebugMode", "DEBUG_MODE" ]: shape_constant_fct() else: with pytest.raises(AssertionError): shape_constant_fct()
def test_specify_shape(self): dtype = self.dtype if dtype is None: dtype = theano.config.floatX rng = np.random.RandomState(utt.fetch_seed()) x1_1 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_1 = self.cast_value(x1_1) x1_2 = np.asarray(rng.uniform(1, 2, [4, 2]), dtype=dtype) x1_2 = self.cast_value(x1_2) x2 = np.asarray(rng.uniform(1, 2, [4, 3]), dtype=dtype) x2 = self.cast_value(x2) # Test that we can replace with values of the same shape x1_shared = self.shared_constructor(x1_1) x1_specify_shape = tensor.specify_shape(x1_shared, x1_1.shape) x1_shared.set_value(x1_2) assert np.allclose(self.ref_fct(x1_shared.get_value(borrow=True)), self.ref_fct( x1_2)) shape_op_fct = theano.function([], x1_shared.shape) topo = shape_op_fct.maker.fgraph.toposort() if theano.config.mode != 'FAST_COMPILE': assert len(topo) == 3 assert isinstance(topo[0].op, tensor.opt.Shape_i) assert isinstance(topo[1].op, tensor.opt.Shape_i) assert isinstance(topo[2].op, tensor.opt.MakeVector) # Test that we forward the input specify_shape_fct = theano.function([], x1_specify_shape) assert np.all(self.ref_fct(specify_shape_fct()) == self.ref_fct(x1_2)) topo_specify = specify_shape_fct.maker.fgraph.toposort() assert len(topo_specify) == 2 # Test that we put the shape info into the graph shape_constant_fct = theano.function([], x1_specify_shape.shape) assert np.all(shape_constant_fct() == shape_op_fct()) topo_cst = shape_constant_fct.maker.fgraph.toposort() if theano.config.mode != 'FAST_COMPILE': assert len(topo_cst) == 1 topo_cst[0].op == theano.compile.function_module.deep_copy_op # Test that we can take the grad. if (theano.sparse.enable_sparse and isinstance(x1_specify_shape.type, theano.sparse.SparseType)): # SparseVariable don't support sum for now. assert not hasattr(x1_specify_shape, 'sum') else: shape_grad = tensor.grad(x1_specify_shape.sum(), x1_shared) shape_constant_fct_grad = theano.function([], shape_grad) # theano.printing.debugprint(shape_constant_fct_grad) shape_constant_fct_grad() # Test that we can replace with values of the different shape # but that will raise an error in some case, but not all specify_shape_fct() x1_shared.set_value(x2) self.assertRaises(AssertionError, specify_shape_fct) # No assertion will be raised as the Op is removed from the graph # when their is optimization if theano.config.mode not in ['FAST_COMPILE', 'DebugMode', 'DEBUG_MODE']: shape_constant_fct() else: self.assertRaises(AssertionError, shape_constant_fct)
x = tt.matrix('x') f = theano.function([x], (x**2).shape) theano.printing.debugprint(f) print("\n") import numpy x = tt.matrix('x') y = tt.matrix('y') z = tt.join(0, x, y) xv = numpy.random.rand(5, 4) yv = numpy.random.rand(3, 3) f = theano.function([x, y], z.shape) theano.printing.debugprint(f) print("\n") f1 = f(xv, yv) theano.printing.debugprint(f1) print("\n") f1 = theano.function([x, y], z) # Do not take the shape. theano.printing.debugprint(f1) print("\n") x = tt.matrix() x_specify_shape = tt.specify_shape(x, (2, 2)) f = theano.function([x], (x_specify_shape**2).shape) theano.printing.debugprint(f) print("\n")
def rnn_one_step(config, params, observed_sequence_last, observed_sequence_current, use_samples, last_states, last_outputs, last_loss): mb_size = config['mb_size'] num_hidden = config['num_hidden'] last_states = T.specify_shape( last_states, (config['mb_size'], 2 * config['num_hidden'])) last_outputs = T.specify_shape(last_outputs, (config['mb_size'], )) obs_last = T.specify_shape(observed_sequence_last, (mb_size, )).reshape( (mb_size, 1)) obs_curr = T.specify_shape(observed_sequence_current, (mb_size, )) obs_use = theano.ifelse.ifelse(use_samples, last_outputs.reshape((mb_size, 1)), obs_last) last_states_1 = last_states[:, 0:1024] last_states_2 = last_states[:, 1024:2048] next_states_1 = T.specify_shape( gru_layer(params, state_below=obs_use, options=None, prefix='gru1', mask=None, one_step=True, init_state=last_states_1, backwards=False)[0], (mb_size, num_hidden)) next_states_2 = T.specify_shape( gru_layer(params, state_below=next_states_1, options=None, prefix='gru2', mask=None, one_step=True, init_state=last_states_2, backwards=False)[0], (mb_size, num_hidden)) h1 = T.specify_shape( fflayer(params, next_states_2, options=None, prefix='ff_h1', activ='lambda x: tensor.maximum(x,0.0)'), (mb_size, num_hidden)) h2 = T.specify_shape( fflayer(params, h1, options=None, prefix='ff_h2', activ='lambda x: tensor.maximum(x,0.0)'), (mb_size, num_hidden)) y = T.specify_shape( fflayer(params, h2, options=None, prefix='ff_1', activ='lambda x: x').flatten(), (mb_size, )) #y = T.specify_shape(T.sum(next_states, axis = 1), (mb_size,)) loss = T.sqr(y - obs_curr) obs_curr = T.specify_shape(observed_sequence_current, (mb_size, )) next_outputs = y next_states = T.specify_shape( T.concatenate([next_states_1, next_states_2], axis=1), (mb_size, num_hidden * 2)) return next_states, next_outputs, loss