def test_density_scaling_with_genarator(self): # We have different size generators def true_dens(): g = gen1() for i, point in enumerate(g): yield stats.norm.logpdf(point).sum() * 10 t = true_dens() # We have same size models with pm.Model() as model1: Normal('n', observed=gen1(), total_size=100) p1 = theano.function([], model1.logpt) with pm.Model() as model2: gen_var = generator(gen2()) Normal('n', observed=gen_var, total_size=100) p2 = theano.function([], model2.logpt) for i in range(10): _1, _2, _t = p1(), p2(), next(t) np.testing.assert_almost_equal(_1, _t, decimal=select_by_precision( float64=7, float32=2)) # Value O(-50,000) np.testing.assert_almost_equal(_1, _2)
def test_density_scaling_with_genarator(self): # We have different size generators def gen1(): i = 0 while True: yield np.ones((10, 100)) * i i += 1 def gen2(): i = 0 while True: yield np.ones((20, 100)) * i i += 1 # We have same size models with pm.Model() as model1: Normal('n', observed=gen1(), total_size=100) p1 = theano.function([], model1.logpt) with pm.Model() as model2: gen_var = generator(gen2()) Normal('n', observed=gen_var, total_size=100) p2 = theano.function([], model2.logpt) # We want densities to be equal for _ in range(10): np.testing.assert_almost_equal(p1(), p2())
def test_density_scaling(self): with pm.Model() as model1: Normal('n', observed=[[1]], total_size=1) p1 = theano.function([], model1.logpt) with pm.Model() as model2: Normal('n', observed=[[1]], total_size=2) p2 = theano.function([], model2.logpt) self.assertEqual(p1() * 2, p2())
def test_density_scaling(self): with pm.Model() as model1: Normal('n', observed=[[1]], total_size=1) p1 = theano.function([], model1.logpt) with pm.Model() as model2: Normal('n', observed=[[1]], total_size=2) p2 = theano.function([], model2.logpt) assert p1() * 2 == p2()
def test_cloning_available(self): gop = generator(integers()) res = gop**2 shared = theano.shared(np.float32(10)) res1 = theano.clone(res, {gop: shared}) f = theano.function([], res1) assert f() == np.float32(100)
def test_cloning_available(self): gop = generator(integers()) res = gop ** 2 shared = theano.shared(np.float32(10)) res1 = theano.clone(res, {gop: shared}) f = theano.function([], res1) assert f() == np.float32(100)
def test_take_along_axis_grad(self, shape, axis, samples): if axis < 0: _axis = len(shape) + axis else: _axis = axis # Setup the theano function t_arr, t_indices = self.get_input_tensors(shape) t_out2 = theano.grad( tt.sum(self._output_tensor(t_arr**2, t_indices, axis)), t_arr, ) func = theano.function([t_arr, t_indices], [t_out2]) # Test that the gradient gives the same output as what is expected arr, indices = self.get_input_values(shape, axis, samples) expected_grad = np.zeros_like(arr) slicer = [slice(None)] * len(shape) for i in range(indices.shape[axis]): slicer[axis] = i inds = indices[slicer].reshape(shape[:_axis] + (1, ) + shape[_axis + 1:]) inds = _make_along_axis_idx(shape, inds, _axis) expected_grad[inds] += 1 expected_grad *= 2 * arr out = func(arr, indices)[0] assert np.allclose(out, expected_grad)
def test_elbo(): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) post_mu = np.array([1.88], dtype=theano.config.floatX) post_sd = np.array([1], dtype=theano.config.floatX) # Create a model for test with Model() as model: mu = Normal('mu', mu=mu0, sd=sigma) Normal('y', mu=mu, sd=1, observed=y_obs) # Create variational gradient tensor mean_field = MeanField(model=model) elbo = -KL(mean_field)()(mean_field.random()) mean_field.shared_params['mu'].set_value(post_mu) mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) f = theano.function([], elbo) elbo_mc = sum(f() for _ in range(10000)) / 10000 # Exact value elbo_true = (-0.5 * ( 3 + 3 * post_mu ** 2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu + y_obs[0] ** 2 + y_obs[1] ** 2 + mu0 ** 2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1)) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def test_elbo(): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) post_mu = np.array([1.88], dtype=theano.config.floatX) post_sd = np.array([1], dtype=theano.config.floatX) # Create a model for test with pm.Model() as model: mu = pm.Normal('mu', mu=mu0, sd=sigma) pm.Normal('y', mu=mu, sd=1, observed=y_obs) # Create variational gradient tensor mean_field = MeanField(model=model) with pm.theanof.change_flags(compute_test_value='off'): elbo = -pm.operators.KL(mean_field)()(10000) mean_field.shared_params['mu'].set_value(post_mu) mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) f = theano.function([], elbo) elbo_mc = f() # Exact value elbo_true = (-0.5 * ( 3 + 3 * post_mu ** 2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu + y_obs[0] ** 2 + y_obs[1] ** 2 + mu0 ** 2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1)) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def test_elbo(): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) post_mu = np.array([1.88], dtype=theano.config.floatX) post_sd = np.array([1], dtype=theano.config.floatX) # Create a model for test with Model() as model: mu = Normal('mu', mu=mu0, sd=sigma) Normal('y', mu=mu, sd=1, observed=y_obs) # Create variational gradient tensor mean_field = MeanField(model=model) elbo = -KL(mean_field)()(10000) mean_field.shared_params['mu'].set_value(post_mu) mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1)) f = theano.function([], elbo) elbo_mc = f() # Exact value elbo_true = (-0.5 * (3 + 3 * post_mu**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu + y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1)) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) m = Normal('m') Normal('n', observed=genvar, total_size=1000) grad1 = theano.function([m], tt.grad(model1.logpt, m)) with pm.Model() as model2: m = Normal('m') shavar = theano.shared(np.ones((1000, 100))) Normal('n', observed=shavar) grad2 = theano.function([m], tt.grad(model2.logpt, m)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) g1 = grad1(1) g2 = grad2(1) np.testing.assert_almost_equal(g1, g2)
def compile(self, parameters, x, mask, y, cost): weight_decay = ((self.w**2).sum()) * self.decay cost += weight_decay gradients = [T.grad(cost, param) for param in self.params] updates = [(p, p - self.lr * g) for p, g in zip(parameters, gradients)] train = theano.function([x, mask, y, self.lr], cost, updates=updates) return train
def __init__(self, cost, grad_vars, extra_vars=None, dtype=None, casting='no', **kwargs): if extra_vars is None: extra_vars = [] names = [arg.name for arg in grad_vars + extra_vars] if any(name is None for name in names): raise ValueError('Arguments must be named.') if len(set(names)) != len(names): raise ValueError('Names of the arguments are not unique.') if cost.ndim > 0: raise ValueError('Cost must be a scalar.') self._grad_vars = grad_vars self._extra_vars = extra_vars self._extra_var_names = set(var.name for var in extra_vars) self._cost = cost self._ordering = ArrayOrdering(grad_vars) self.size = self._ordering.size self._extra_are_set = False if dtype is None: dtype = theano.config.floatX self.dtype = dtype for var in self._grad_vars: if not np.can_cast(var.dtype, self.dtype, casting): raise TypeError('Invalid dtype for variable %s. Can not ' 'cast to %s with casting rule %s.' % (var.name, self.dtype, casting)) if not np.issubdtype(var.dtype, float): raise TypeError('Invalid dtype for variable %s. Must be ' 'floating point but is %s.' % (var.name, var.dtype)) givens = [] self._extra_vars_shared = {} for var in extra_vars: shared = theano.shared(var.tag.test_value, var.name + '_shared__') self._extra_vars_shared[var.name] = shared givens.append((var, shared)) self._vars_joined, self._cost_joined = self._build_joined( self._cost, grad_vars, self._ordering.vmap) grad = tt.grad(self._cost_joined, self._vars_joined) grad.name = '__grad' inputs = [self._vars_joined] self._theano_function = theano.function(inputs, [self._cost_joined, grad], givens=givens, **kwargs)
def test_ndim(self): for ndim in range(10): res = list(itertools.islice(integers_ndim(ndim), 0, 2)) generator = GeneratorAdapter(integers_ndim(ndim)) gop = GeneratorOp(generator)() f = theano.function([], gop) assert ndim == res[0].ndim np.testing.assert_equal(f(), res[0]) np.testing.assert_equal(f(), res[1])
def test_basic(self): generator = GeneratorAdapter(integers()) gop = GeneratorOp(generator)() assert gop.tag.test_value == np.float32(0) f = theano.function([], gop) assert f() == np.float32(0) assert f() == np.float32(1) for _ in range(2, 100): f() assert f() == np.float32(100)
def test_default_value(self): def gen(): for i in range(2): yield np.ones((10, 10)) * i gop = generator(gen(), np.ones((10, 10)) * 10) f = theano.function([], gop) np.testing.assert_equal(np.ones((10, 10)) * 0, f()) np.testing.assert_equal(np.ones((10, 10)) * 1, f()) np.testing.assert_equal(np.ones((10, 10)) * 10, f()) with pytest.raises(ValueError): gop.set_default(1)
def compile(self, params, x1, x2, y, cost): gradients = [T.grad(cost, param) for param in params] zipped_grads = [ theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % p) for p in params ] running_up2 = [ theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % p) for p in params ] running_grads2 = [ theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % p) for p in params ] zgup = [(zg, g) for zg, g in zip(zipped_grads, gradients)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g**2)) for rg2, g in zip(running_grads2, gradients)] train = theano.function([x1, x2, y], cost, updates=zgup + rg2up, name='adadelta_train') updir = [ -T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2) ] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud**2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(params, updir)] update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_update') return train, update
def test_density_scaling_with_genarator(self): # We have different size generators def true_dens(): g = gen1() for i, point in enumerate(g): yield stats.norm.logpdf(point).sum() * 10 t = true_dens() # We have same size models with pm.Model() as model1: Normal('n', observed=gen1(), total_size=100) p1 = theano.function([], model1.logpt) with pm.Model() as model2: gen_var = generator(gen2()) Normal('n', observed=gen_var, total_size=100) p2 = theano.function([], model2.logpt) for i in range(10): _1, _2, _t = p1(), p2(), next(t) np.testing.assert_almost_equal(_1, _t) np.testing.assert_almost_equal(_1, _2)
def __init__(self, cost, grad_vars, extra_vars=None, dtype=None, casting='no', **kwargs): if extra_vars is None: extra_vars = [] names = [arg.name for arg in grad_vars + extra_vars] if any(name is None for name in names): raise ValueError('Arguments must be named.') if len(set(names)) != len(names): raise ValueError('Names of the arguments are not unique.') if cost.ndim > 0: raise ValueError('Cost must be a scalar.') self._grad_vars = grad_vars self._extra_vars = extra_vars self._extra_var_names = set(var.name for var in extra_vars) self._cost = cost self._ordering = ArrayOrdering(grad_vars) self.size = self._ordering.size self._extra_are_set = False if dtype is None: dtype = theano.config.floatX self.dtype = dtype for var in self._grad_vars: if not np.can_cast(var.dtype, self.dtype, casting): raise TypeError('Invalid dtype for variable %s. Can not ' 'cast to %s with casting rule %s.' % (var.name, self.dtype, casting)) if not np.issubdtype(var.dtype, float): raise TypeError('Invalid dtype for variable %s. Must be ' 'floating point but is %s.' % (var.name, var.dtype)) givens = [] self._extra_vars_shared = {} for var in extra_vars: shared = theano.shared(var.tag.test_value, var.name + '_shared__') self._extra_vars_shared[var.name] = shared givens.append((var, shared)) self._vars_joined, self._cost_joined = self._build_joined( self._cost, grad_vars, self._ordering.vmap) grad = tt.grad(self._cost_joined, self._vars_joined) grad.name = '__grad' inputs = [self._vars_joined] self._theano_function = theano.function( inputs, [self._cost_joined, grad], givens=givens, **kwargs)
def encode_images(images): """Encodes images using model saved to disk earlier.""" json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) model.load_weights('model.h5') model.compile(optimizer='adadelta', loss='binary_crossentropy') get_activations = theano.function([model.layers[0].input], model.layers[3].output, allow_input_downcast=True) data = get_activations(images) np.save('encoded_imgs.npy', data)
def test_set_gen_and_exc(self): def gen(): for i in range(2): yield np.ones((10, 10)) * i gop = generator(gen()) f = theano.function([], gop) np.testing.assert_equal(np.ones((10, 10)) * 0, f()) np.testing.assert_equal(np.ones((10, 10)) * 1, f()) with pytest.raises(StopIteration): f() gop.set_gen(gen()) np.testing.assert_equal(np.ones((10, 10)) * 0, f()) np.testing.assert_equal(np.ones((10, 10)) * 1, f())
def abstractfit(model, X_train, y): nfeat = X_train.shape[1] model.configs['nvis'] = nfeat dataset = model.dataset_adaptor.fit_transform(X_train, y) super(type(model), model).__init__(**model.configs) trainer = model.trainer.get_trainer(model, dataset) trainer.main_loop() # define estimator X = tensor.matrix() # ff = theano.function([X], model.encode(X), compile.mode.Mode(linker='py', optimizer='fast_compile')) ff = theano.function([X], model.fprop(X)) # print ff(X_train) model.estimator = ff return model
def makefn(self, outs, mode=None, *args, **kwargs): """Compiles a Theano function which returns `outs` and takes the variable ancestors of `outs` as inputs. Parameters ---------- outs : Theano variable or iterable of Theano variables mode : Theano compilation mode Returns ------- Compiled Theano function """ return theano.function(self.vars, outs, allow_input_downcast=True, on_unused_input='ignore', accept_inplace=True, mode=mode, *args, **kwargs)
def makefn(self, outs, mode=None, *args, **kwargs): """Compiles a Theano function which returns `outs` and takes the variable ancestors of `outs` as inputs. Parameters ---------- outs : Theano variable or iterable of Theano variables mode : Theano compilation mode Returns ------- Compiled Theano function """ with self: return theano.function(self.vars, outs, allow_input_downcast=True, on_unused_input='ignore', accept_inplace=True, mode=mode, *args, **kwargs)
PyErr_Format(PyExc_RuntimeError, "gpuarray error: kEye: %%s. n%%lu, m=%%lu.", GpuKernel_error(&%(kname)s, err), (unsigned long)dims[0], (unsigned long)dims[1]); %(fail)s; } if(%(sync)d) GpuArray_sync(&%(z)s->ga); """ % locals() return s def c_code_cache_version(self): return (21, 4) mult4plus5op = GpuAXPBOp(4, 5) x = theano.tensor.matrix('x') z = mult4plus5op(x) theano.printing.debugprint(z) print("Compiling") f = theano.function([x], z) theano.printing.debugprint(f) print("Eval") ind = numpy.random.rand(3, 2).astype(theano.config.floatX) print("Equality", numpy.allclose(f(ind), 2 * ind)) print(mult4plus5op)
def __init__(self, x, y, batch_size, videos, kernels, pools, n_input, n_output, hidden_input, params=None): learning_rate = 0.1 rng = numpy.random.RandomState(1234) print '... building the model' sys.stdout.flush() if not params: # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = ConvLayer(x, n_input[0], n_output[0], kernels[0], videos[0], pools[0], batch_size, 'L0', rng) layer1 = ConvLayer(layer0.output, n_input[1], n_output[1], kernels[1], videos[1], pools[1], batch_size, 'L1', rng) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=hidden_input, n_out=batch_size, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=2) else: layer0 = ConvLayer(x, n_input[0], n_output[0], kernels[0], videos[0], pools[0], batch_size, 'L0', rng, True, params[6], params[7]) layer1 = ConvLayer(layer0.output, n_input[1], n_output[1], kernels[1], videos[1], pools[1], batch_size, 'L1', rng, True, params[4], params[5]) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=hidden_input, n_out=batch_size, activation=T.tanh, W=params[2], b=params[3]) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=2, W=params[0], b=params[1]) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent self.params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, self.params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(self.params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) self.train_model = theano.function([x, y], cost, updates=updates) self.validate_model = theano.function(inputs=[x, y], outputs=layer3.errors(y)) self.predict = theano.function(inputs=[x], outputs=layer3.y_pred) print '... building done' sys.stdout.flush()
def fit(values, data_set, params): model = tied_dropout_iterative_model OutputLog().write('Model: {0}'.format(model.__name__)) if len(params) == 1: update_param(params[0], values) else: for value, param in zip(values, params): update_param(param, value) model_x, model_y, hidden_x, hidden_y, loss, outputs, hooks = model.build_model(x_var, data_set.trainset[0].shape[1], y_var, data_set.trainset[1].shape[1], layer_sizes=Params.LAYER_SIZES, parallel_width=Params.PARALLEL_WIDTH, drop_prob=Params.DROPOUT, weight_init=Params.WEIGHT_INIT) params_x = lasagne.layers.get_all_params(model_x, trainable=True) params_y = lasagne.layers.get_all_params(model_y, trainable=True) updates = OrderedDict(batchnormalizeupdates(hooks, 100)) params_x.extend(params_y) params = lasagne.utils.unique(params_x) current_learning_rate = Params.BASE_LEARNING_RATE updates.update( lasagne.updates.momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM)) train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates) test_y = theano.function([x_var, y_var], [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_x], on_unused_input='ignore') test_x = theano.function([x_var, y_var], [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_y], on_unused_input='ignore') batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE output_string = '{0}/{1} loss: {2} ' output_string += ' '.join(['{0}:{{{1}}}'.format(key, index + 3) for index, key in enumerate(outputs.keys())]) for epoch in range(Params.EPOCH_NUMBER): OutputLog().write('Epoch {0}'.format(epoch)) for index, batch in enumerate( iterate_minibatches(data_set.trainset[0], data_set.trainset[1], Params.BATCH_SIZE, True)): input_x, input_y = batch train_loss = train_fn(input_x, input_y) OutputLog().write(output_string.format(index, batch_number, *train_loss)) x_values = test_y(data_set.tuning[0], data_set.tuning[1]) y_values = test_x(data_set.tuning[0], data_set.tuning[1]) OutputLog().write('\nValidating model\n') for index, (x, y) in enumerate(zip(x_values, y_values)): search_recall, describe_recall = complete_rank(x, y, data_set.reduce_val) validation_loss = calculate_reconstruction_error(x, y) correlation = calculate_mardia(x, y, 0) OutputLog().write('Layer {0} - loss: {1}, correlation: {2}, recall: {3}'.format(index, validation_loss, correlation, sum(search_recall) + sum( describe_recall))) return sum(search_recall) + sum(describe_recall)
cost = -T.mean(T.log(model)[T.arange(y.shape[0]), y]) + 0.0001 * ( W1**2).sum() + 0.0001 * (W2**2).sum() g_params = T.grad(cost=cost, wrt=params) learning_rate = 0.01 updates = [(param, param - learning_rate * gparam) for param, gparam in zip(params, g_params)] index = T.lscalar() train_model = theano.function( inputs=[index], outputs=[cost, error], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function(inputs=[x, y], outputs=[cost, error]) print("Training") n_epochs = 1000 n_train_batches = train_set[0].shape[0] // batch_size n_iters = n_epochs * n_train_batches train_loss = numpy.zeros(n_iters) train_error = numpy.zeros(n_iters)
else: updates = OrderedDict() params_x.extend(params_y) params = lasagne.utils.unique(params_x) current_learning_rate = Params.BASE_LEARNING_RATE updates.update( lasagne.updates.nesterov_momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM)) train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates) inference_model_y = theano.function([x_var], [ lasagne.layers.get_output( layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_x ], on_unused_input='ignore') inference_model_x = theano.function([y_var], [ lasagne.layers.get_output( layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_y ], on_unused_input='ignore') batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE
def run_experiment(experiment_values, data_parameters, path): id = uuid.uuid4() OutputLog().set_output_path(path, suffix=str(id)) top = 0 param_backup = copy.deepcopy(Params.__dict__) update_param(experiment_values) y_var = tensor.fmatrix() x_var = tensor.fmatrix() # construct data set data_set = Container().create(data_parameters['name'], data_parameters) data_set.load() model_results = {'train': [], 'validate': []} model = tied_dropout_iterative_model model_x, model_y, hidden_x, hidden_y, loss, outputs, hooks = model.build_model(x_var, data_set.trainset[0].shape[1], y_var, data_set.trainset[1].shape[1], layer_sizes=Params.LAYER_SIZES, parallel_width=Params.PARALLEL_WIDTH, drop_prob=Params.DROPOUT, weight_init=Params.WEIGHT_INIT) params_x = lasagne.layers.get_all_params(model_x, trainable=True) params_y = lasagne.layers.get_all_params(model_y, trainable=True) if hooks: updates = OrderedDict(batchnormalizeupdates(hooks, 100)) else: updates = OrderedDict() current_learning_rate = Params.BASE_LEARNING_RATE params_x.extend(params_y) params = lasagne.utils.unique(params_x) updates.update( lasagne.updates.momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM)) train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates) test_y = theano.function([x_var, y_var], [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_x], on_unused_input='ignore') test_x = theano.function([x_var, y_var], [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_y], on_unused_input='ignore') batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE output_string = '{0}/{1} loss: {2} ' output_string += ' '.join(['{0}:{{{1}}}'.format(key, index + 3) for index, key in enumerate(outputs.keys())]) for epoch in range(Params.EPOCH_NUMBER): OutputLog().write('Epoch {0}'.format(epoch)) model_results['train'].append({'loss': []}) model_results['validate'].append({}) for label in outputs.keys(): model_results['train'][epoch][label] = [] for index, batch in enumerate( iterate_minibatches(data_set.trainset[0], data_set.trainset[1], Params.BATCH_SIZE, True)): input_x, input_y = batch train_loss = train_fn(numpy.cast[theano.config.floatX](input_x), numpy.cast[theano.config.floatX](input_y)) model_results['train'][epoch]['loss'].append(train_loss[0]) for label, value in zip(outputs.keys(), train_loss[1:]): model_results['train'][epoch][label].append(value) OutputLog().write(output_string.format(index, batch_number, *train_loss)) if Params.CROSS_VALIDATION: x_values = test_y(data_set.tuning[0], data_set.tuning[1]) y_values = test_x(data_set.tuning[0], data_set.tuning[1]) OutputLog().write('\nValidating model\n') if VALIDATE_ALL: for index, (x, y) in enumerate(zip(x_values, y_values)): search_recall, describe_recall = complete_rank(x, y, data_set.reduce_val) validation_loss = calculate_reconstruction_error(x, y) correlation = calculate_mardia(x, y, top) OutputLog().write('Layer {0} - loss: {1}, correlation: {2}, recall: {3}'.format(index, validation_loss, correlation, sum( search_recall) + sum( describe_recall))) else: middle_x = x_values[Params.TEST_LAYER] middle_y = y_values[Params.TEST_LAYER] search_recall, describe_recall = complete_rank(middle_x, middle_y, data_set.reduce_val) validation_loss = calculate_reconstruction_error(middle_x, middle_y) correlation = calculate_mardia(middle_x, middle_y, top) mean_x = numpy.mean(numpy.mean(middle_x, axis=0)), mean_y = numpy.mean(numpy.mean(middle_y, axis=0)), var_x = numpy.mean(numpy.var(middle_x, axis=0)), var_y = numpy.mean(numpy.var(middle_y, axis=0)), OutputLog().write('Layer - loss: {1}, correlation: {2}, recall: {3}, mean_x: {4}, mean_y: {5},' 'var_x: {6}, var_y: {7}'.format(index, validation_loss, correlation, sum(search_recall) + sum( describe_recall), mean_x, mean_y, var_x, var_y)) model_results['validate'][epoch]['loss'] = validation_loss model_results['validate'][epoch]['correlation'] = correlation model_results['validate'][epoch]['search_recall'] = sum(search_recall) model_results['validate'][epoch]['describe_recall'] = sum(describe_recall) model_results['validate'][epoch]['mean_x'] = mean_x model_results['validate'][epoch]['mean_y'] = mean_y model_results['validate'][epoch]['var_x'] = var_x model_results['validate'][epoch]['var_y'] = var_y if epoch in Params.DECAY_EPOCH: current_learning_rate *= Params.DECAY_RATE if hooks: updates = OrderedDict(batchnormalizeupdates(hooks, 100)) else: updates = OrderedDict() updates.update( lasagne.updates.nesterov_momentum(loss, params, learning_rate=current_learning_rate, momentum=0.9)) del train_fn train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates) model_results['experiment'] = experiment_values with file(os.path.join(path, 'results_{0}.p'.format(id)), 'wb') as results_file: pickle.dump(model_results, results_file) Params.__dict__ = param_backup del train_fn del test_x del test_y del model_x del model_y return model_results
model_y = cPickle.load(open(os.path.join(INPUT_PATH, 'model_y.p'), 'rb')) x_var = model_x[0].input_var y_var = model_y[0].input_var hidden_x = filter(lambda layer: isinstance(layer, TiedDropoutLayer), model_x) hidden_y = filter(lambda layer: isinstance(layer, TiedDropoutLayer), model_y) hidden_y = list(reversed(hidden_y)) hooks = OrderedDict() test_y = theano.function([x_var], [ lasagne.layers.get_output(hidden_x[Params.OUTPUT_LAYER], moving_avg_hooks=hooks, deterministic=True) ], on_unused_input='ignore') test_x = theano.function([y_var], [ lasagne.layers.get_output(hidden_y[Params.OUTPUT_LAYER], moving_avg_hooks=hooks, deterministic=True) ], on_unused_input='ignore') batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE test_model(test_x, test_y, x_test, y_test,
word_to_index = dict([(w, i) for i, w in enumerate(index_)]) print("Dictionary:") for i in range(10): print(i, "->", index_[i]) embedding_size = len(index_) print("Embedding size:", embedding_size) print("Hidden layer dimension:", args.hidden) print("Model:", args.model) x = T.ivector() model, params = getattr(models, args.model).model(x, embedding_size, args.hidden) y_out = T.argmax(model, axis=-1) print("Compiling...") predict_model = theano.function(inputs=[x], outputs=y_out) print("Loading parameters") load_params( "params_{}_{}_h{}_e{}".format(args.mode, args.model, args.hidden, args.epochs), params) print("Predicting", args.predicts, "sentences") for i in range(args.predicts): sentence = [0] words = raw_input("Type a few words:") print(words) indices = [ word_to_index[word] for word in nltk.word_tokenize(words.lower()) ] sentence = sentence + indices
def _function(self, arr, indices, out): return theano.function([arr, indices], [out])
if hooks: updates = OrderedDict(batchnormalizeupdates(hooks, 100)) else: updates = OrderedDict() params_x.extend(params_y) params = lasagne.utils.unique(params_x) current_learning_rate = Params.BASE_LEARNING_RATE updates.update( lasagne.updates.nesterov_momentum(loss, params, learning_rate=current_learning_rate, momentum=Params.MOMENTUM)) train_fn = theano.function([x_var, y_var], [loss] + outputs.values(), updates=updates) test_y = theano.function([x_var], [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_x], on_unused_input='ignore') test_x = theano.function([y_var], [lasagne.layers.get_output(layer, moving_avg_hooks=hooks, deterministic=True) for layer in hidden_y], on_unused_input='ignore') batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE output_string = '{0}/{1} loss: {2} ' output_string += ' '.join(['{0}:{{{1}}}'.format(key, index + 3) for index, key in enumerate(outputs.keys())])
model_x, hidden_x, weights_x, biases_x, prediction_y, hooks_x = model.build_single_channel(var_x, data_set.trainset[ 0].shape[1], data_set.trainset[ 1].shape[1], layer_sizes=layer_sizes, drop_prob=drop_prob, name='x') loss_y = lasagne.objectives.squared_error(var_y, prediction_y).sum(axis=1).mean() params_x = lasagne.layers.get_all_params(model_x, trainable=True) updates = OrderedDict(batchnormalizeupdates(hooks_x, 100)) updates.update(lasagne.updates.nesterov_momentum(loss_y, params_x, 0.001, 0.5)) train_fn_x = theano.function([var_x, var_y], [loss_y], updates=updates) batch_number = data_set.trainset[0].shape[0] / BATCH_SIZE for epoch in range(EPOCH_NUMBER): OutputLog().write('Epoch {0}'.format(epoch)) for index, batch in enumerate( iterate_minibatches(data_set.trainset[0], data_set.trainset[1], BATCH_SIZE, True)): input_x, input_y = batch loss = train_fn_x(input_x, input_y) OutputLog().write('{0}/{1} loss: {2}'.format(index, batch_number, loss[0])) model_y, hidden_y, weights_y, biases_y, prediction_x, hooks_y = model.build_single_channel(var_y, data_set.trainset[ 1].shape[1], data_set.trainset[
input=layer0_input, image_shape=(1, 1, 1, window), filter_shape=(1, 1, 1, 5), poolsize=(1, 2)) #prepare data xi = [] for i in range(len(x1)): starti = i - window + 1 e = [0 for col in range(-starti)] e.extend(x1[max(0, starti):i + 1]) #print(len(e)) xi.append(e) xinumpy = np.array(xi) xis = theano.shared(name='xi', value=xinumpy.astype(theano.config.floatX)) index = T.iscalar() action = theano.function([index], [layer0.conv_out], givens={ x: xis[index:(index + 1)], }) learning_rate = 0.01 grads = T.grad(cost, layer0.params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(layer0.params, grads)] v = [action(i) for i in range(lenthX)] print(x1) print(v) print("end")
x = T.ivector() model, params = getattr(models, args.model).model(x, embedding_size, args.hidden) y_out = T.argmax(model, axis=-1) y = T.ivector() cost = -T.mean(T.log(model)[T.arange(y.shape[0]), y]) g_params = T.grad(cost=cost, wrt=params) lr = T.scalar('learning_rate') updates = [(param, param - lr * gparam) for param, gparam in zip(params, g_params)] print("Compiling...") train_model = theano.function(inputs=[x, y, lr], outputs=cost, updates=updates) learning_rate = 0.01 n_train = len(y_train) n_iters = args.epochs * n_train print("Training:", args.epochs, "epochs of", n_train, "iterations") train_loss = numpy.zeros(n_iters) start_time = timeit.default_timer() for epoch in range(args.epochs): for i in range(n_train): iteration = i + n_train * epoch train_loss[iteration] = train_model( numpy.asarray(X_train[i], dtype='int32'), numpy.asarray(y_train[i], dtype='int32'), learning_rate) if (len(train_loss) > 1 and train_loss[-1] > train_loss[-2]):
embeddings_dim = 17 output_voca_size = 134 # n_decodesteps = 100 # nb_passes = 3 input_sentences = T.tensor3("input_sentences", dtype=floatX) encoder_output = T.tensor3("encoder_output", dtype=floatX) # output_sentences = T.tensor3("output_sentences", dtype=floatX) l_in = InputLayer(shape=(batch_size, input_sentence_length, embeddings_dim), input_var=input_sentences) layer = GRUDecoder(l_in, hidden_dim, attention_dim, output_voca_size) output_sentences = layer.get_output_for([input_sentences, encoder_output]) fn = theano.function( [input_sentences, encoder_output], output_sentences, # mode='DebugMode', on_unused_input='ignore') np_encoder_output = inputs = np.random.normal( size=(batch_size, input_sentence_length, hidden_dim)).astype(floatX) np_input_sentences = np.random.normal(size=(batch_size, input_sentence_length, embeddings_dim)).astype(floatX) np_output_sentences = fn(np_input_sentences, np_encoder_output) print np_output_sentences print np_output_sentences.shape
model = T.nnet.softmax(T.dot(x, W) + b) y_pred = T.argmax(model, axis=1) error = T.mean(T.neq(y_pred, y)) cost = -T.mean(T.log(model)[T.arange(y.shape[0]), y]) g_W = T.grad(cost=cost, wrt=W) g_b = T.grad(cost=cost, wrt=b) learning_rate = 0.13 index = T.lscalar() train_model = theano.function( inputs=[index], outputs=[cost, error], updates=[(W, W - learning_rate * g_W), (b, b - learning_rate * g_b)], givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function(inputs=[x, y], outputs=[cost, error]) print("Training") n_epochs = 100 n_train_batches = train_set[0].shape[0] // batch_size n_iters = n_epochs * n_train_batches train_loss = numpy.zeros(n_iters) train_error = numpy.zeros(n_iters)
# construct data set data_set = Container().create(data_parameters['name'], data_parameters) data_set.load() x_var = model_x[0].input_var y_var = model_y[0].input_var # Export network path = OutputLog().output_path hidden_x = filter(lambda layer: isinstance(layer, TiedDropoutLayer), model_x) hidden_y = filter(lambda layer: isinstance(layer, TiedDropoutLayer), model_y) hidden_y = reversed(hidden_y) test_y = theano.function([x_var, y_var], [lasagne.layers.get_output(layer, deterministic=True) for layer in hidden_x], on_unused_input='ignore') test_x = theano.function([x_var, y_var], [lasagne.layers.get_output(layer, deterministic=True) for layer in hidden_y], on_unused_input='ignore') batch_number = data_set.trainset[0].shape[0] / Params.BATCH_SIZE OutputLog().write('Test results') # cca = cross_decomposition.CCA(top) t_x = test_y(data_set.testset[0], data_set.testset[1]) t_y = test_x(data_set.testset[0], data_set.testset[1])
def __init__(self, structure, datasets, activation_function=T.nnet.sigmoid, learning_rate=0.1, regression_layer=SumOfSquaredErrors, gui_worker=None, normalize=normalize_images): """ Creates a neural net. :param structure: list of number of nodes per layer: [inputLayer, hiddenLayers... , outputLayer] :param datasets: training and test sets as list [training set, test set] :param activation_function: e.g. T.nnet.sigmoid :param learning_rate: learing rate at each layer except output layer :return: """ if not regression_layer: regression_layer = SumOfSquaredErrors if not learning_rate: learning_rate = 0.1 self.random_feed = numpy.random.RandomState(23455) self.gui_worker = gui_worker self.learning_rate = learning_rate self.activation_function = activation_function self.layers = [] self.params = [] self.labels = [] self.n_outputs = structure[-1] self.train_set_images, self.train_set_labels = datasets[0] self.test_set_images, self.test_set_labels = datasets[1] if self.gui_worker: self.gui_worker.gui.status_message.emit("Normalizing cases...") else: print('----> Normalizing cases...') self.train_set_images = normalize(self.train_set_images) self.test_set_images = normalize(self.test_set_images) if self.gui_worker: self.gui_worker.gui.status_message.emit("Constructing the neural net...") else: print('----> Constructing the neural net...') self.input = T.dvector('input') self.label = T.dvector('label') input_to_next_layer = self.input # Create the layers for i in range(len(structure) - 1): a = self.activation_function[i] if type(self.activation_function) is list else self.activation_function if i < len(structure) - 2: self.layers.append( HiddenLayer( self.random_feed, _input=input_to_next_layer, n_in=structure[i], n_out=structure[i+1], activation=a ) ) input_to_next_layer = self.layers[i].output else: # create last layer self.layers.append( regression_layer( _input=input_to_next_layer, n_in=structure[i], n_out=structure[i+1], activation=a ) ) self.error = self.layers[-1].error(self.label) for layer in self.layers: self.params += layer.params self.gradients = T.grad(self.error, self.params) self.updates = [ (param, param - self.learning_rate * grad) for param, grad in zip(self.params, self.gradients) ] self.predictor = theano.function( [self.input], self.layers[-1].prediction ) self.trainer = theano.function( [self.input, self.label], [self.layers[-1].prediction, self.error], updates=self.updates )
batch_size = 10 input_sentence_length = 88 hidden_dim = 13 attention_dim = 11 embeddings_dim = 17 output_voca_size = 134 # n_decodesteps = 100 # nb_passes = 3 input_sentences = T.tensor3("input_sentences", dtype=floatX) encoder_output = T.tensor3("encoder_output", dtype=floatX) # output_sentences = T.tensor3("output_sentences", dtype=floatX) l_in = InputLayer(shape=(batch_size, input_sentence_length, embeddings_dim), input_var=input_sentences) layer = GRUDecoder(l_in, hidden_dim, attention_dim, output_voca_size) output_sentences = layer.get_output_for([input_sentences, encoder_output]) fn = theano.function( [input_sentences, encoder_output], output_sentences, # mode='DebugMode', on_unused_input="ignore", ) np_encoder_output = inputs = np.random.normal(size=(batch_size, input_sentence_length, hidden_dim)).astype(floatX) np_input_sentences = np.random.normal(size=(batch_size, input_sentence_length, embeddings_dim)).astype(floatX) np_output_sentences = fn(np_input_sentences, np_encoder_output) print np_output_sentences print np_output_sentences.shape