def test_infer_shape(self): admat = matrix() bdmat = matrix() admat_val = numpy.random.rand(3, 4).astype(config.floatX) bdmat_val = numpy.random.rand(3, 4).astype(config.floatX) self._compile_and_check([admat, bdmat], [SoftmaxGrad()(admat, bdmat)], [admat_val, bdmat_val], SoftmaxGrad)
def test_min_informative_str(): """ evaluates a reference output to make sure the min_informative_str function works as intended """ A = tensor.matrix(name='A') B = tensor.matrix(name='B') C = A + B C.name = 'C' D = tensor.matrix(name='D') E = tensor.matrix(name='E') F = D + E G = C + F mis = min_informative_str(G).replace("\t", " ") reference = """A. Elemwise{add,no_inplace} B. C C. Elemwise{add,no_inplace} D. D E. E""" if mis != reference: print('--' + mis + '--') print('--' + reference + '--') assert mis == reference
def test_linear_regression(): inpt = T.matrix('inpt') inpt.tag.test_value = np.zeros((3, 10)) inpt.tag.test_value target = T.matrix('target') target.tag.test_value = np.zeros((3, 2)) l = AffineNonlinear(inpt, 10, 2, 'tanh') loss = squared(target, l.output).sum(1).mean() m = SupervisedModel(inpt=inpt, target=target, output=l.output, loss=loss, parameters=l.parameters) f_predict = m.function([m.inpt], m.output) f_loss = m.function([m.inpt, m.target], m.loss) X = np.zeros((20, 10)) Z = np.zeros((20, 2)) Y = f_predict(X) assert Y.shape == (20, 2), 'ouput has wrong shape' l = f_loss(X, Z) assert np.array(l).ndim == 0, 'loss is not a scalar'
def test_sequence_variable_inputs(): x, y = tensor.matrix(), tensor.matrix() parallel_1 = Parallel(input_names=['input_1', 'input_2'], input_dims=dict(input_1=4, input_2=5), output_dims=dict(input_1=3, input_2=2), prototype=Linear(), weights_init=Constant(2), biases_init=Constant(1)) parallel_2 = Parallel(input_names=['input_1', 'input_2'], input_dims=dict(input_1=3, input_2=2), output_dims=dict(input_1=5, input_2=4), prototype=Linear(), weights_init=Constant(2), biases_init=Constant(1)) sequence = Sequence([parallel_1.apply, parallel_2.apply]) sequence.initialize() new_x, new_y = sequence.apply(x, y) x_val = numpy.ones((4, 4), dtype=theano.config.floatX) y_val = numpy.ones((4, 5), dtype=theano.config.floatX) assert_allclose( new_x.eval({x: x_val}), (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot( 2 * numpy.ones((3, 5))) + numpy.ones((4, 5))) assert_allclose( new_y.eval({y: y_val}), (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot( 2 * numpy.ones((2, 4))) + numpy.ones((4, 4)))
def Z_LSTM(input_var, z_dim=256, nhid=512, layers=2, gradclip=10, training=True): ret = {} state_vars = [] ret['input'] = layer = nn.layers.InputLayer(input_var=input_var, shape=(None, None, z_dim)) batchsize, seqlen, _ = layer.input_var.shape for lay in xrange(layers): ret['drop_{}'.format(lay)] = layer = nn.layers.DropoutLayer(layer, p=0.3) if training: ret['lstm_{}'.format(lay)] = layer = LSTMSampleableLayer(layer, nhid, grad_clipping=gradclip, learn_init=True) else: cell_var = T.matrix('cell_var_{}'.format(lay)) hid_var = T.matrix('hid_var_{}'.format(lay)) state_vars.append(cell_var) state_vars.append(hid_var) ret['lstm_{}'.format(lay)] = layer = LSTMSampleableLayer(layer, nhid, cell_init=cell_var, hid_init=hid_var) ret['cell_{}'.format(lay)] = nn.layers.SliceLayer(layer, axis=2, indices=slice(None,nhid)) ret['hid_{}'.format(lay)] = layer = nn.layers.SliceLayer(layer, axis=2, indices=slice(nhid,None)) ret['reshape'] = layer = nn.layers.ReshapeLayer(layer, (-1, nhid)) ret['project'] = layer = nn.layers.DenseLayer(layer, num_units=z_dim, nonlinearity=None) ret['output'] = layer = nn.layers.ReshapeLayer(layer, (batchsize, seqlen, z_dim)) # final state slice layers for passing to next instance of lstm for lay in xrange(layers): ret['cellfinal_{}'.format(lay)] = nn.layers.SliceLayer(ret['cell_{}'.format(lay)], axis=1, indices=-1) ret['hidfinal_{}'.format(lay)] = nn.layers.SliceLayer(ret['hid_{}'.format(lay)], axis=1, indices=-1) return ret, state_vars
def initialise_model(self, X_train, y_train): print 'Initialising model...' self.input_shape = X_train.shape[1] input_var = T.matrix('inputs') target_var = T.matrix('targets') if self.normalise: y_train = self.normalise_y(y_train, reset = True) X_train = self.normalise_X(X_train, reset = True) # Create neural network model self.network = self.build_custom_mlp(input_var) prediction = lasagne.layers.get_output(self.network) loss = lasagne.objectives.squared_error(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(self.network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=self.learning_rate, momentum=self.momentum) test_prediction = lasagne.layers.get_output(self.network, deterministic=True) test_loss = lasagne.objectives.squared_error(test_prediction, target_var) test_loss = test_loss.mean() self.train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) self.predict_output = theano.function([input_var], outputs=test_prediction, allow_input_downcast=True) self.initialised = True
def __init__(self, embedding_dim=100, num_hidden_layers=2, hidden_dim=200, in_dropout_p=0.2, hidden_dropout_p=0.5, update_hyperparams={'learning_rate': 0.01}): self.embedding_dim = embedding_dim self.num_hidden_layers = num_hidden_layers self.hidden_dim = hidden_dim self.in_dropout_p = in_dropout_p self.hidden_dropout_p = update_hyperparams print >> sys.stderr, 'Building computation graph for discriminator...' self.input_var = T.matrix('input') self.target_var = T.matrix('targer') self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim), input_var=T.tanh(self.input_var), name='l_in') self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in, 0.2) self.layers = [self.l_in, self.l_in_dr] for i in xrange(self.num_hidden_layers): l_hid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name=('l_hid_%s' % i))) l_hid_dr = lasagne.layers.DropoutLayer(l_hid, 0.5) self.layers.append(l_hid) self.layers.append(l_hid_dr) self.l_preout = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=1, nonlinearity=None, name='l_preout')) self.l_out = lasagne.layers.NonlinearityLayer(self.l_preout, nonlinearity=lasagne.nonlinearities.sigmoid, name='l_out') self.prediction = lasagne.layers.get_output(self.l_out) self.loss = lasagne.objectives.binary_crossentropy(self.prediction, self.target_var).mean() self.accuracy = T.eq(T.ge(self.prediction, 0.5), self.target_var).mean() self.params = lasagne.layers.get_all_params(self.l_out, trainable=True) self.updates = lasagne.updates.adam(self.loss, self.params, **update_hyperparams) print >> sys.stderr, 'Compiling discriminator...' self.train_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy], updates=self.updates) self.eval_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy])
def __init__(self, input_layer, loss_function=mse, aggregation='mean'): """ Constructor :parameters: - input_layer : a `Layer` whose output is the networks prediction given its input - loss_function : a loss function of the form `f(x, t, m)` that returns a scalar loss given tensors that represent the predicted values, true values and mask as arguments. - aggregation : either: - `None` or `'mean'` : the elements of the loss will be multiplied by the mask and the mean returned - `'sum'` : the elements of the loss will be multiplied by the mask and the sum returned - `'normalized_sum'` : the elements of the loss will be multiplied by the mask, summed and divided by the sum of the mask """ self.input_layer = input_layer self.loss_function = loss_function self.target_var = T.matrix("target") self.mask_var = T.matrix("mask") if aggregation not in self._valid_aggregation: raise ValueError('aggregation must be \'mean\', \'sum\', ' '\'normalized_sum\' or None,' ' not {0}'.format(aggregation)) self.aggregation = aggregation
def init_variables(self): self.input_var = T.matrix('inputs') self.side_var = T.matrix('contexts') # do regression #self.target_var = T.ivector('targets') self.target_var = T.vector('targets') self.num_classes = 1 # regression -> dim matters, not classes
def __init__(self, dnodex,dim): X = T.matrix() Y = T.matrix() eta = T.scalar() temperature=T.scalar() num_input = len(format(dnodex.npoi,'b')) num_hidden = dim num_output = len(format(dnodex.npoi,'b')) inputs = InputLayer(X, name="inputs") lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1") lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2") #lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3") softmax = SoftmaxLayer(num_hidden, num_output, input_layer=lstm2, name="yhat", temperature=temperature) Y_hat = softmax.output() self.layers = inputs, lstm1, lstm2, softmax params = get_params(self.layers) caches = make_caches(params) cost = T.mean(T.nnet.categorical_crossentropy(Y_hat, Y)) updates = momentum(cost, params, caches, eta) self.train = theano.function([X, Y, eta, temperature], cost, updates=updates, allow_input_downcast=True) predict_updates = one_step_updates(self.layers) self.predict_char = theano.function([X, temperature], Y_hat, updates=predict_updates, allow_input_downcast=True)
def fine_train(nn,datasets,learning_Rate,batch_sizes,epochs): train_set_x, train_set_y = datasets[0] n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_sizes train_label = T.cast(train_label,'float64') index = T.lscalar() x = T.matrix('x') y = T.matrix('y') min_batch_cost = [] if nn is None: mynn = ForwordNN(x,y,n_in,n_out,hidden_sizes) else: mynn=nn cost,update = mynn.get_cost_update(x,y,learning_Rate) train_nn = theano.function([index], cost, updates = update, givens = { x:train_data[index*batch_sizes:(index+1)*batch_sizes,:], y:train_label[index*batch_sizes:(index+1)*batch_sizes,:] } ) for num_epochs in range(epochs): t1=time.time() for num_batch in xrange(n_train_batchs): min_batch_cost.append(train_nn(num_batch)) t2=time.time() print 'The %d/%dth training,takes %f seconds,cost is %f' %(num_epochs+1,epochs,(t2-t1),np.mean(min_batch_cost)) return mynn
def __init__(self, model, cost, monitoring_dataset, batch_size): """ Parameters ---------- model : pylearn2.models.model.Model the model whose best parameters we want to keep track of cost : tensor_like cost function used to evaluate the model's performance monitoring_dataset : pylearn2.datasets.dataset.Dataset dataset on which to compute the cost batch_size : int size of the batches used to compute the cost """ self.model = model self.cost = cost self.dataset = monitoring_dataset self.batch_size = batch_size self.minibatch = T.matrix('minibatch') self.target = T.matrix('target') if cost.supervised: self.supervised = True self.cost_function = theano.function(inputs=[self.minibatch, self.target], outputs=cost(model, self.minibatch, self.target)) else: self.supervised = False self.cost_function = theano.function(inputs=[self.minibatch], outputs=cost(model, self.minibatch)) self.best_cost = numpy.inf self.best_params = model.get_param_values()
def set_generation_function(recurrent_model, output_model): # set input data (1*num_samples*features) input_data = tensor.matrix(name='input_seq', dtype=floatX) # set init hidden/cell(num_samples*hidden_size) prev_hidden_data = tensor.matrix(name='prev_hidden_data', dtype=floatX) prev_cell_data = tensor.matrix(name='prev_cell_data', dtype=floatX) # get hidden data recurrent_data = get_tensor_output(input=[input_data, prev_hidden_data, prev_cell_data], layers=recurrent_model, is_training=False) cur_hidden_data = recurrent_data[0] cur_cell_data = recurrent_data[1] # get prediction data output_data = get_tensor_output(input=cur_hidden_data, layers=output_model, is_training=False) # input data generation_function_inputs = [input_data, prev_hidden_data, prev_cell_data] generation_function_outputs = [cur_hidden_data, cur_cell_data, output_data] generation_function = theano.function(inputs=generation_function_inputs, outputs=generation_function_outputs, on_unused_input='ignore') return generation_function
def test_compute_flag(self): orig_compute_test_value = theano.config.compute_test_value try: x = T.matrix('x') y = T.matrix('y') y.tag.test_value = numpy.random.rand(4,5).astype(config.floatX) # should skip computation of test value theano.config.compute_test_value = 'off' z = T.dot(x,y) assert not hasattr(z.tag, 'test_value') # should fail when asked by user theano.config.compute_test_value = 'raise' self.assertRaises(ValueError, T.dot, x, y) # test that a warning is raised if required theano.config.compute_test_value = 'warn' warnings.simplefilter('error', UserWarning) try: self.assertRaises(UserWarning, T.dot, x, y) finally: # Restore the default behavior. # TODO There is a cleaner way to do this in Python 2.6, once # Theano drops support of Python 2.4 and 2.5. warnings.simplefilter('default', UserWarning) finally: theano.config.compute_test_value = orig_compute_test_value
def test_string_var(self): orig_compute_test_value = theano.config.compute_test_value try: theano.config.compute_test_value = 'raise' x = T.matrix('x') x.tag.test_value = numpy.random.rand(3,4).astype(config.floatX) y = T.matrix('y') y.tag.test_value = numpy.random.rand(4,5).astype(config.floatX) z = theano.shared(numpy.random.rand(5,6).astype(config.floatX)) # should work out = T.dot(T.dot(x,y), z) assert hasattr(out.tag, 'test_value') tf = theano.function([x,y], out) assert _allclose( tf(x.tag.test_value, y.tag.test_value), out.tag.test_value) def f(x,y,z): return T.dot(T.dot(x,y),z) # this test should fail z.set_value(numpy.random.rand(7,6).astype(config.floatX)) self.assertRaises(ValueError, f, x, y, z) finally: theano.config.compute_test_value = orig_compute_test_value
def _construct_compute_fe_terms(self): """ Construct a function for computing terms in variational free energy. """ # setup some symbolic variables for theano to deal with xi = T.matrix() xo = T.matrix() _, hi_zmuv = self._construct_zmuv_samples(xi, 1) # construct values to output nll = self.nlli[-1] kld = self.kld_z.flatten() + self.kld_hi_q2p.flatten() # compile theano function for a one-sample free-energy estimate fe_term_sample = theano.function(inputs=[ xi, xo ], \ outputs=[nll, kld], \ givens={self.x_in: xi, \ self.x_out: xo, \ self.hi_zmuv: hi_zmuv}, \ updates=self.scan_updates) # construct a wrapper function for multi-sample free-energy estimate def fe_term_estimator(XI, XO, sample_count): # compute a multi-sample estimate of variational free-energy nll_sum = np.zeros((XI.shape[0],)) kld_sum = np.zeros((XI.shape[0],)) for i in range(sample_count): result = fe_term_sample(XI, XO) nll_sum += result[0].ravel() kld_sum += result[1].ravel() mean_nll = nll_sum / float(sample_count) mean_kld = kld_sum / float(sample_count) return [mean_nll, mean_kld] return fe_term_estimator
def _construct_sample_from_prior(self): """ Construct a function for drawing independent samples from the distribution generated by this MultiStageModel. This function returns the full sequence of "partially completed" examples. """ z_sym = T.matrix() x_sym = T.matrix() irs = self.ir_steps oputs = [self.obs_transform(self.s0)] oputs.extend([self.obs_transform(self.si[i]) for i in range(irs)]) _, hi_zmuv = self._construct_zmuv_samples(x_sym, 1) sample_func = theano.function(inputs=[z_sym, x_sym], outputs=oputs, \ givens={ self.z: z_sym, \ self.x_in: T.zeros_like(x_sym), \ self.x_out: T.zeros_like(x_sym), \ self.hi_zmuv: hi_zmuv }, \ updates=self.scan_updates) def prior_sampler(samp_count): x_samps = to_fX( np.zeros((samp_count, self.obs_dim)) ) old_switch = self.train_switch.get_value(borrow=False) # set model to generation mode self.set_train_switch(switch_val=0.0) z_samps = to_fX( npr.randn(samp_count, self.z_dim) ) model_samps = sample_func(z_samps, x_samps) # set model back to either training or generation mode self.set_train_switch(switch_val=old_switch) return model_samps return prior_sampler
def test_wrong_dims(self): a = tt.matrix() increment = tt.matrix() index = 0 self.assertRaises(TypeError, tt.set_subtensor, a[index], increment) self.assertRaises(TypeError, tt.inc_subtensor, a[index], increment)
def __init__(self,rng=None,theano_rng=None,n_in=121,hidden_layers_sizes=[400,400,400],n_hidden=6,n_out=1): self.dA_layers = [] self.sigmoid_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(rng.randint(2**30)) self.x = T.matrix('x') self.y = T.matrix('y') for i in xrange(self.n_layers): if i == 0: input_size = n_in layer_input = self.x else: input_size = hidden_layers_sizes[i-1] layer_input = self.sigmoid_layers[-1].output sigmoid_layer = regressionLayer(rng=rng,input=layer_input,n_in=input_size,n_out=hidden_layers_sizes[i],activation=T.tanh) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = daLayer(rng=rng,theano_rng=theano_rng,input=layer_input,n_in=input_size,n_hidden=hidden_layers_sizes[i],W=sigmoid_layer.W,bhid=sigmoid_layer.b,activation=T.tanh) self.dA_layers.append(dA_layer) self.reg_layer1 = regressionLayer(rng=rng,input=self.sigmoid_layers[-1].output,n_in=hidden_layers_sizes[-1],n_out=n_hidden) self.reg_layer2 = regressionLayer(rng=rng,input=self.reg_layer1.output,n_in=n_hidden,n_out=n_out) self.params.extend(self.reg_layer1.params) self.params.extend(self.reg_layer2.params) self.output = self.reg_layer2.output self.errors = T.mean((self.output-self.y)**2)
def build_model(tparams, options): trng = RandomStreams(SEED) # Used for dropout. use_noise = theano.shared(numpy_floatX(0.)) x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype=config.floatX) y = tensor.vector('y', dtype='int64') n_timesteps = x.shape[0] n_samples = x.shape[1] emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_proj']]) proj = get_layer(options['encoder'])[1](tparams, emb, options, prefix=options['encoder'], mask=mask) if options['encoder'] == 'lstm': proj = (proj * mask[:, :, None]).sum(axis=0) proj = proj / mask.sum(axis=0)[:, None] if options['use_dropout']: proj = dropout_layer(proj, use_noise, trng) pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b']) f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob') f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred') cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean() return use_noise, x, mask, y, f_pred_prob, f_pred, cost
def rebuild_nn(nn_params): W_e, W_p, W_o, b_o = read_obj(nn_params, 4) mlp = MLPNoHid(W_e.get_value(), W_p.get_value(), W_o.get_value(), b_o.get_value()) wx = T.matrix('word', dtype='int32') px = T.matrix('POS', dtype='int32') f_pred = theano.function([wx, px], mlp.output(wx, px)) return f_pred
def test_pdf_compare_logpdf(): theano.config.compute_test_value = 'raise' sample = T.matrix() sample.tag.test_value = np.random.random((10, 5)).astype(theano.config.floatX) mean = T.vector() mean.tag.test_value = np.empty(5).astype(theano.config.floatX) cov = T.matrix() cov.tag.test_value = np.random.random((5, 5)).astype(theano.config.floatX) density = mvn.pdf(sample, mean, cov) log_density = mvn.logpdf(sample, mean, cov) f_density = theano.function([sample, mean, cov], density) f_logdensity = theano.function([sample, mean, cov], log_density) some_sample = np.random.random((20, 5)).astype(theano.config.floatX) some_mean = np.array([1., 2., 3., 4., 5.]).astype(theano.config.floatX) w = np.random.random((5, 5)).astype(theano.config.floatX) some_cov = np.dot(w, w.T) + np.eye(5).astype(theano.config.floatX) d = f_density(some_sample, some_mean, some_cov) log_d = f_logdensity(some_sample, some_mean, some_cov) assert np.allclose(np.log(d), log_d)
def build_model(self): ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') U, W, V, bh, by = self.U, self.W, self.V, self.bh, self.by x = T.matrix('x') y = T.matrix('y') def forward_prop_step(x_t, s_tm1, U, W, bh): s_t = self.activation(T.dot(U, x_t) + T.dot(W, s_tm1) + bh) return s_t s, _ = theano.scan( forward_prop_step, sequences=x, outputs_info=[dict(initial=T.zeros(self.hidden_dim))], non_sequences=[U, W, bh], mode='DebugMode') p_y = T.nnet.softmax(T.dot(self.V, s[-1]) + by) prediction = T.argmax(p_y, axis=1) o_error = T.sum(T.nnet.categorical_crossentropy(p_y, y)) self.cost = o_error + self.L1_reg * self.L1 + self.L2_reg * self.L2_sqr # Assign functions self.forward_propagation = theano.function([x], s[-1]) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], o_error) l_r = T.scalar('l_r', dtype=theano.config.floatX) # learning rate (may change) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum self.bptt, self.f_update = self.Momentum(x, y, l_r, mom)
def est_both_assert_merge_2_reverse(self): # Test case "test_both_assert_merge_2" but in reverse order x1 = T.matrix('x1') x2 = T.matrix('x2') x3 = T.matrix('x3') e = T.dot(x1, T.opt.assert_op(x2, (x2 > x3).all())) +\ T.dot(T.opt.assert_op(x1, (x1 > x3).all()), x2) g = FunctionGraph([x1, x2, x3], [e]) MergeOptimizer().optimize(g) strg = theano.printing.debugprint(g, file='str') strref = '''Elemwise{add,no_inplace} [@A] '' 7 |dot [@B] '' 6 | |Assert{msg='Theano Assert failed!'} [@C] '' 5 | | |x1 [@D] | | |All [@E] '' 3 | | |Elemwise{gt,no_inplace} [@F] '' 1 | | |x1 [@D] | | |x3 [@G] | |Assert{msg='Theano Assert failed!'} [@H] '' 4 | |x2 [@I] | |All [@J] '' 2 | |Elemwise{gt,no_inplace} [@K] '' 0 | |x2 [@I] | |x3 [@G] |dot [@B] '' 6 ''' print(strg) assert strg == strref, (strg, strref)
def get_model(Ws, bs, dropout=False): v = T.matrix('input') m = T.matrix('missing') q = T.matrix('target') k = T.vector('normalization factor') # Set all missing/target values to 0.5 keep_mask = (1-m) * (1-q) h = keep_mask * (v * 2 - 1) # Convert to +1, -1 # Normalize layer 0 h *= k.dimshuffle(0, 'x') for l in xrange(len(Ws)): h = T.dot(h, Ws[l]) + bs[l] if l < len(Ws) - 1: h = h * (h > 0) # relu if dropout: mask = srng.binomial(n=1, p=0.5, size=h.shape) h = h * mask * 2 output = sigmoid(h) LL = v * T.log(output) + (1 - v) * T.log(1 - output) # loss = -(q * LL).sum() / q.sum() loss = -((1 - m) * LL).sum() / (1 - m).sum() return v, m, q, k, output, loss
def test_hgemm_swap(): from theano.sandbox.cuda import nvcc_compiler if nvcc_compiler.nvcc_version < '7.5': raise SkipTest("SgemmEx is only avaialble on cuda 7.5+") v = tensor.vector(dtype='float16') m = tensor.matrix(dtype='float16') m2 = tensor.matrix(dtype='float16') m32 = tensor.matrix(dtype='float32') # test that we don't try to replace anything but matrix x matrix in float16 f = theano.function([v, m], tensor.dot(v, m), mode=mode_with_gpu) assert len([node for node in f.maker.fgraph.apply_nodes if isinstance(node.op, GpuGemm)]) == 0 f = theano.function([m32, m], tensor.dot(m32, m), mode=mode_with_gpu) assert len([node for node in f.maker.fgraph.apply_nodes if isinstance(node.op, GpuGemm)]) == 0 f = theano.function([m, m2], tensor.dot(m, m2), mode=mode_with_gpu) assert len([node for node in f.maker.fgraph.apply_nodes if isinstance(node.op, GpuGemm)]) == 1 v1 = numpy.random.random((3, 4)).astype('float16') v2 = numpy.random.random((4, 2)).astype('float16') of = f(v1, v2) on = numpy.dot(v1, v2) utt.assert_allclose(of, on)
def _setup_vars(self, sparse_input): '''Setup Theano variables for our network. Parameters ---------- sparse_input : bool Not used -- sparse inputs are not supported for recurrent networks. Returns ------- vars : list of theano variables A list of the variables that this network requires as inputs. ''' _warn_dimshuffle() assert not sparse_input, 'Theanets does not support sparse recurrent models!' self.src = TT.ftensor3('src') #self.src_mask = TT.imatrix('src_mask') self.src_mask = TT.matrix('src_mask') self.dst = TT.ftensor3('dst') self.labels = TT.imatrix('labels') self.weights = TT.matrix('weights') if self.weighted: return [self.src, self.src_mask, self.dst, self.labels, self.weights] return [self.src, self.dst]
def funcs(dataset, network, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, momentum=MOMENTUM, alpha=L2_CONSTANT): """ Method the returns the theano functions that are used in training and testing. These are the train and predict functions. The predict function returns out output of the network. """ # symbolic variables X_batch = T.matrix() y_batch = T.matrix() # this is the cost of the network when fed throught the noisey network l2 = lasagne.regularization.l2(X_batch) train_output = lasagne.layers.get_output(network, X_batch) cost = lasagne.objectives.mse(train_output, y_batch) cost = cost.mean() #+ alpha*l2 # test the performance of the netowork without noise test = lasagne.layers.get_output(network, X_batch, deterministic=True) pred = T.argmax(test, axis=1) accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX) all_params = lasagne.layers.get_all_params(network) updates = lasagne.updates.nesterov_momentum(cost, all_params, learning_rate, momentum) train = theano.function(inputs=[X_batch, y_batch], outputs=cost, updates=updates, allow_input_downcast=True) valid = theano.function(inputs=[X_batch, y_batch], outputs=cost, allow_input_downcast=True) predict = theano.function(inputs=[X_batch], outputs=pred, allow_input_downcast=True) return dict( train=train, valid=valid, predict=predict )
def __init__(self, n_in, n_out, n_h, learning_rate=0.12): self.x = T.matrix(dtype=theano.config.floatX) # @UndefinedVariable self.target = T.matrix(dtype=theano.config.floatX) # @UndefinedVariable bound_x = numpy.sqrt(6. / (n_in + n_h)) bound_h = numpy.sqrt(6. / (n_h + n_h)) self.params = [] self.w_x = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_in, n_h)), dtype=theano.config.floatX)) # @UndefinedVariable self.params.append(self.w_x) self.w_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_h)), dtype=theano.config.floatX)) # @UndefinedVariable self.params.append(self.w_h) self.b_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h,)), dtype=theano.config.floatX)) # @UndefinedVariable self.params.append(self.b_h) self.w = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_out)), dtype=theano.config.floatX)) # @UndefinedVariable self.params.append(self.w) self.b = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_out,)), dtype=theano.config.floatX)) # @UndefinedVariable self.params.append(self.b) self.h0 = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_h,)), dtype=theano.config.floatX)) # @UndefinedVariable self.params.append(self.h0) def one_step(x, h1): h = T.nnet.sigmoid(T.dot(x, self.w_x) + T.dot(h1, self.w_h) + self.b_h) y = T.nnet.sigmoid(T.dot(h, self.w) + self.b) return h, y [hs, ys], _ = theano.scan(fn=one_step, sequences=self.x, outputs_info=[self.h0, None]) cost = -T.mean(self.target * T.log(ys) + (1 - self.target) * T.log(1 - ys)) grads = T.grad(cost, self.params) updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, grads)] self.train = theano.function([self.x, self.target], cost, updates=updates) self.predict = theano.function([self.x], ys)
def test_one_step(self): h0 = tensor.matrix('h0') c0 = tensor.matrix('c0') x = tensor.matrix('x') h1, c1 = self.lstm.apply(x, h0, c0, iterate=False) next_h = theano.function(inputs=[x, h0, c0], outputs=[h1]) h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=theano.config.floatX) c0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=theano.config.floatX) x_val = 0.1 * numpy.array([range(12), range(12, 24)], dtype=theano.config.floatX) W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX) W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX) W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX) W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX) # omitting biases because they are zero activation = numpy.dot(h0_val, W_state_val) + x_val def sigmoid(x): return 1. / (1. + numpy.exp(-x)) i_t = sigmoid(activation[:, :3] + c0_val * W_cell_to_in) f_t = sigmoid(activation[:, 3:6] + c0_val * W_cell_to_forget) next_cells = f_t * c0_val + i_t * numpy.tanh(activation[:, 6:9]) o_t = sigmoid(activation[:, 9:12] + next_cells * W_cell_to_out) h1_val = o_t * numpy.tanh(next_cells) assert_allclose(h1_val, next_h(x_val, h0_val, c0_val)[0], rtol=1e-6)
reg_cost = lib.ops.kl_unit_gaussian.kl_unit_gaussian( mu, log_sigma ).sum(axis=1) alpha = T.minimum( 1, T.cast(total_iters, theano.config.floatX) / lib.floatX(ALPHA_ITERS) ) if VANILLA: cost = reconst_cost else: cost = reconst_cost + (alpha * reg_cost) sample_fn_latents = T.matrix('sample_fn_latents') sample_fn = theano.function( [sample_fn_latents, images], T.nnet.sigmoid(decode_algo(sample_fn_latents, images)), on_unused_input='warn' ) eval_fn = theano.function( [images, total_iters], cost.mean() ) train_data, dev_data, test_data = lib.mnist_binarized.load( BATCH_SIZE, TEST_BATCH_SIZE )
def shuffle_data(samples, labels): idx = np.arange(samples.shape[0]) np.random.shuffle(idx) #print (samples.shape, labels.shape) samples, labels = samples[idx], labels[idx] return samples, labels decay = 1e-6 learning_rate = 0.01 epochs = 1000 # theano expressions X = T.matrix() #features Y = T.matrix() #output w1, b1 = create_weights(36, 10), create_bias( 10) #weights and biases from input to hidden layer w2, b2 = create_weights(10, 6, logistic=False), create_bias( 6) #weights and biases from hidden to output layer h1 = T.nnet.sigmoid(T.dot(X, w1) + b1) py = T.nnet.softmax(T.dot(h1, w2) + b2) y_x = T.argmax(py, axis=1) cost = T.mean(T.nnet.categorical_crossentropy( py, Y)) + decay * (T.sum(T.sqr(w1) + T.sum(T.sqr(w2)))) params = [w1, b1, w2, b2]
def trainword(keyword, window_radius = 3, learning_rate = 0.1, n_epochs = 10,batch_size = 1,filter_height=3,filter_width = 50, pool_height=1,pool_width = 1, loginput_num = 50, vector_size = 50): print '==training parameters==' print 'window_radius: '+str(window_radius) print 'vector_size: '+str(vector_size) print 'filter_height: '+str(filter_height) print 'filter_width: '+str(filter_width) print 'pool_height: '+str(pool_height) print 'pool_width: '+str(pool_width) print 'loginput_num: '+str(loginput_num) print 'learning_rate: '+str(learning_rate) print 'n_epochs: '+str(n_epochs) print 'batch_size: '+str(batch_size) rng = numpy.random.RandomState(23455) datasets = load_data_word(keyword, window_radius, vector_size) train_set_x, train_set_y, trainsentence = datasets[0][0] valid_set_x, valid_set_y, validsentence = datasets[0][1] test_set_x, test_set_y, testsentence = datasets[0][2] senselist = datasets[1] n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size print n_train_batches, n_valid_batches, n_test_batches index = T.lscalar() x = T.matrix('x') y = T.ivector('y') print '... building the model for '+keyword layer0_input = x.reshape((batch_size, 1, 2*window_radius+1, vector_size)) layer0 = WsdConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 2*window_radius+1, vector_size), filter_shape=(1, 1, filter_height, filter_width), poolsize=(pool_height, pool_width) ) layer1_input = layer0.output.flatten(2) #layer1_input = layer0_input.flatten(2) layer1 = HiddenLayer( rng, input=layer1_input, #n_in=(2*window_radius+1)*(vector_size+1-filter_width+1-pool_width), n_in=int((2*window_radius+2-filter_height)/float(pool_height))*int((vector_size+1-filter_width)/float(pool_width)), n_out=loginput_num, activation=T.tanh ) layer2 = LogisticRegression(input=layer1_input, n_in=int((2*window_radius+2-filter_height)/float(pool_height))*int((vector_size+1-filter_width)/float(pool_width)), n_out=20) cost = layer2.negative_log_likelihood(y) test_model = theano.function( [index], layer2.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer2.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) output_size = theano.function( [index], [layer0.output.shape], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size] } ) output_model = theano.function( [index], [layer2.y_pred], givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size] } ) output_test = theano.function( [index], [layer2.y_pred], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size] } ) params = layer2.params + layer0.params grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_params = 0 best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] #for index in range(0, n_valid_batches): # print output_model(index) # print valid_set_y[index * batch_size: (index + 1) * batch_size].eval() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter best_params = [copy.deepcopy(layer0.params), copy.deepcopy(layer1.params), copy.deepcopy(layer2.params)] # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] #print params[0].eval() #print (params[0].eval() == layer2.params[0].eval()) #print validation_losses for index in range(0, n_valid_batches): for i in range(0, batch_size): true_i = batch_size*index+i #print output_model(index) print validsentence[true_i], '\t',senselist[output_model(index)[0][i]], '\t', senselist[valid_set_y[true_i].eval()] #print test_losses test_score = numpy.mean(test_losses) for index in range(0, n_test_batches): for i in range(0, batch_size): true_i = batch_size*index+i #print output_model(index) print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()] print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') for index in range(0, n_test_batches): for i in range(0, batch_size): true_i = batch_size*index+i #print output_model(index) print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()] layer0.W = copy.deepcopy(best_params[0][0]) layer0.b = copy.deepcopy(best_params[0][1]) #layer0.params = [layer0.W, layer0.b] layer1.W = copy.deepcopy(best_params[1][0]) layer1.b = copy.deepcopy(best_params[1][1]) #layer1.params = [layer1.W, layer1.b] layer2.W = copy.deepcopy(best_params[2][0]) layer2.b = copy.deepcopy(best_params[2][1]) #layer2.params = [layer2.W, layer2.b] for index in range(0, n_test_batches): for i in range(0, batch_size): true_i = batch_size*index+i #print output_model(index) print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()] print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_GRBM_DBN(finetune_lr=0.2, pretraining_epochs=1, pretrain_lr=0.01, k=1, training_epochs=10, dataset='mnist.pkl.gz', batch_size=10, annealing_learning_rate=0.999): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ datasets = load_data_grbm(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network x_skeleton = T.matrix('x') dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10, finetune_lr=finetune_lr, input=x_skeleton) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) # The following part is to get the value for testing if False: index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 2D vector of [float32] labels dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) dbn.load('dbn_params.npy') #train_fn, validate_model, test_model = dbn.build_finetune_functions( # datasets=datasets, batch_size=batch_size, # learning_rate=finetune_lr) valid_score_i = theano.function([index], dbn.errors, givens={dbn.x: valid_set_x[index * batch_size: (index + 1) * batch_size], dbn.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size validation_losses = [valid_score_i(i) for i in xrange(n_valid_batches)] validation_losses = valid_score_i() this_validation_loss = numpy.mean(validation_losses) ## get the actual softmax layer temp = theano.function([index],dbn.logLayer.p_y_given_x, givens={dbn.x: valid_set_x[index * batch_size: (index + 1) * batch_size]}) temp_out = [temp(i) for i in xrange(n_valid_batches)] print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): start_time_temp = time.clock() if i==0: # for GRBM, the The learning rate needs to be about one or #two orders of magnitude smaller than when using #binary visible units and some of the failures reported in the # literature are probably due to using a pretrain_lr_new = pretrain_lr*0.1 else: pretrain_lr_new = pretrain_lr # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr_new)) end_time_temp = time.clock() print 'Pre-training layer %i, epoch %d, cost %f ' % (i, epoch, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) ) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, annealing_learning_rate=annealing_learning_rate) print '... finetunning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: import warnings warnings.filterwarnings("ignore") validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print dbn.state_learning_rate.get_value()
def test_mlp(learning_rate=0.05, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, split=0, batch_size=1, n_hidden=[100], rot=5, seuil=0.25): datasets = load_data(split) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value( borrow=True).shape[0] #/ batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] #/ batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.matrix('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) shp = train_set_x.get_value().shape[1] # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=shp, n_hidden=n_hidden, n_out=shp) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch pred_test = theano.function(inputs=[index], outputs=[classifier.y_pred, y], givens={ x: test_set_x[index:(index + 1)], y: test_set_y[index:(index + 1)] }) pred_train = theano.function(inputs=[index], outputs=[classifier.y_pred, y], givens={ x: train_set_x[index:(index + 1)], y: train_set_y[index:(index + 1)] }) pred_valid = theano.function(inputs=[index], outputs=[classifier.y_pred, y], givens={ x: valid_set_x[index:(index + 1)], y: valid_set_y[index:(index + 1)] }) def evaluation(fn, d, ens, epoch, seuil, plot): x = d.get_value() n_samples = x.shape[0] if plot: bigpic = [] acc = [] for i in xrange(n_samples): pred, true = fn(i) pred_mask = pred * (x[i] > 0) pred_out = (pred_mask >= seuil).astype(numpy.int) true_out = true.astype(numpy.int) acc += [jaccard(pred_out, true_out)] if plot: bigpic += [x[i], pred, pred_mask, pred_out, true_out] this_acc = numpy.mean(acc) std_acc = numpy.std(acc) print('epoch %i, %s error %f +- %f %%' % (epoch, ens, this_acc * 100., std_acc * 100.)) if plot: bigpic = numpy.vstack(bigpic) tile = tile_raster_images(bigpic, (311, 457), (n_samples // 4, 5 * 4), output_pixel_vals=True) Im.fromarray(tile).convert("RGB").save("images/" + ens + str(epoch) + ".png") return this_acc gparams = [T.grad(cost, param) for param in classifier.params] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### n_training_samples = train_set_x.get_value().shape[0] print '... training over %i training samples' % n_training_samples # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 1 # 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = -numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False evaluation(pred_train, train_set_x, "train", epoch, seuil, True) print "training started..." while (epoch < n_epochs) and (not done_looping): rotate_data((train_set_x, train_set_y), rot) epoch = epoch + 1 minibatch_avg_cost = [] for minibatch_index in xrange(n_train_batches): minibatch_avg_cost += [train_model(minibatch_index)] # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: print "mean avg cost over training :: ", numpy.mean( minibatch_avg_cost) evaluation(pred_train, train_set_x, "train", epoch, seuil, True) val = evaluation(pred_valid, valid_set_x, "valid", epoch, seuil, True) # if we got the best validation score until now if val > best_validation_loss: #improve patience if loss improvement is good enough if (val > best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = val best_iter = iter evaluation(pred_test, test_set_x, "test", epoch, seuil, True) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def __init__(self, rng, filter_shape, image_shape, poolsize=2, xin=None): assert image_shape[1] == filter_shape[1] self.image_shape=theano.shared( value=np.asarray(image_shape,dtype='int16'),borrow=True) self.poolsize=(poolsize,poolsize) #self.input = input if xin: self.x=xin else: self.x = T.matrix(name='input') self.x1=self.x.reshape(self.image_shape,ndim=4) self.filter_shape=filter_shape # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / np.prod(self.poolsize)) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) self.W_prime=self.W[:,:,::-1,::-1] self.W_prime=self.W_prime.dimshuffle(1,0,2,3) #self.W_prime=self.W_prime[:,::-1] #print self.W.get_value() #print self.W_prime.eval() # the bias is a 1D tensor -- one bias per output feature map b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) bp_values = np.zeros((filter_shape[1],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.b_prime = theano.shared(value=bp_values, borrow=True) if poolsize<-1: self.x1=self.x1.repeat(int(-poolsize), axis=2).repeat(int(-poolsize), axis=3) # convolve input feature maps with filters conv_out = conv2d( input=self.x1, filters=self.W, filter_shape=filter_shape, #image_shape=self.image_shape.eval(), border_mode='full' ) bp=(filter_shape[2]-1)/2 conv_out=conv_out[:,:,bp:-bp,bp:-bp] # downsample each feature map individually, using maxpooling if poolsize>1: try: self.pooled_out = pool.pool_2d( input=conv_out, ws=self.poolsize, ignore_border=True ) except: self.pooled_out = pool.pool_2d( input=conv_out, ds=self.poolsize, ignore_border=True ) else: self.pooled_out=conv_out self.hidden = T.maximum(0,(self.pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))) # store parameters of this layer self.params = [self.W,self.b]
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, finetune_lr=0.1, input_x=None, label=None): self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) # wudi add the mean and standard deviation of the activation values to exam the neural net # Reference: Understanding the difficulty of training deep feedforward neural networks, Xavier Glorot, Yoshua Bengio self.out_mean = [] self.out_std = [] assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data if input_x is None: self.x = T.matrix('x') # the data is presented as rasterized images else: self.x = input_x if label is None: self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels else: self.y = label for i in xrange(self.n_layers): if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.out_mean.append(T.mean(sigmoid_layer.output)) self.out_std.append(T.std(sigmoid_layer.output)) self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer if i == 0: rbm_layer = GBRBM(input=layer_input, n_in=input_size, n_hidden=hidden_layers_sizes[i], \ W=None, hbias=None, vbias=None, numpy_rng=None, transpose=False, activation=T.nnet.sigmoid, theano_rng=None, name='grbm', W_r=None, dropout=0, dropconnect=0) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) ################################################# # Wudi change the annealing learning rate: ################################################# self.state_learning_rate = theano.shared(numpy.asarray(finetune_lr, dtype=theano.config.floatX), borrow=True)
def __init__( self, input_shape, output_dim, prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, name=None, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Serializable.quick_init(self, locals()) if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer() else: optimizer = LbfgsOptimizer() self.output_dim = output_dim self._optimizer = optimizer if prob_network is None: prob_network = MLP( input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=NL.softmax, ) l_prob = prob_network.output_layer LasagnePowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = TT.imatrix("ys") old_prob_var = TT.matrix("old_prob") x_mean_var = theano.shared( np.zeros((1,) + input_shape), name="x_mean", broadcastable=(True,) + (False, ) * len(input_shape) ) x_std_var = theano.shared( np.ones((1,) + input_shape), name="x_std", broadcastable=(True,) + (False, ) * len(input_shape) ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var}) old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical() mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars)) loss = - TT.mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = special.to_onehot_sym(TT.argmax(prob_var, axis=1), output_dim) self._f_predict = ext.compile_function([xs_var], predicted) self._f_prob = ext.compile_function([xs_var], prob_var) self._l_prob = l_prob optimizer_args = dict( loss=loss, target=self, network_outputs=[prob_var], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, step_size) optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._x_mean_var = x_mean_var self._x_std_var = x_std_var
def createGradientFunctions(self): # Create the Theano variables x_arg1 = T.matrix('x_arg1', dtype='float32') x_arg2 = T.matrix('x_arg2', dtype='float32') true_class = T.matrix('true_class', dtype='float32') eps = T.matrix("eps", dtype='float32') Arg1_W1, Arg1_W2, Arg1_W3, Arg1_W4, Arg1_W5, \ Arg2_W2, Arg2_W3, Arg2_W4, \ Arg1_W2_prior, Arg1_W3_prior, Arg2_W2_prior, Arg2_W3_prior, \ Arg1_b1, Arg1_b4, Arg1_b5, Arg2_b4, b2, b3, \ L_Wc, L_bc, Label_W1, Label_W2, Label_W3, \ L_W4, Label_b1, L_b4, b2_prior, b3_prior, L_W6, L_b6, L_W7, L_b7, L_W8, L_b8 = self.tparams # Parameter Tying Arg2_W1 = Arg1_W1 Arg2_b1 = Arg1_b1 Arg2_W5 = Arg1_W5 Arg2_b5 = Arg1_b5 # Neural Inferencer h_arg1_encoder = T.tanh(T.dot(Arg1_W1,x_arg1) + Arg1_b1.dimshuffle(0, 'x')) h_arg2_encoder = T.tanh(T.dot(Arg2_W1,x_arg2) + Arg2_b1.dimshuffle(0, 'x')) l_encoder = T.tanh(T.dot(Label_W1,true_class) + Label_b1.dimshuffle(0, 'x')) mu_poster_encoder = T.dot(Arg1_W2,h_arg1_encoder) + T.dot(Arg2_W2,h_arg2_encoder) \ + T.dot(Label_W2,l_encoder) + b2.dimshuffle(0, 'x') log_sigma_poster_encoder = \ np.float32(0.5)*(T.dot(Arg1_W3,h_arg1_encoder) + T.dot(Arg2_W3,h_arg2_encoder) \ + T.dot(Label_W3,l_encoder) + b3.dimshuffle(0, 'x')) mu_prior_encoder = T.dot(Arg1_W2_prior,h_arg1_encoder) + T.dot(Arg2_W2_prior,h_arg2_encoder) \ + b2_prior.dimshuffle(0, 'x') log_sigma_prior_encoder = \ np.float32(0.5)*(T.dot(Arg1_W3_prior,h_arg1_encoder) + T.dot(Arg2_W3_prior,h_arg2_encoder) \ + b3_prior.dimshuffle(0, 'x')) #Find the hidden variable z z = mu_poster_encoder + T.exp(log_sigma_poster_encoder)*eps prior = T.sum((log_sigma_prior_encoder - log_sigma_poster_encoder) + \ (T.exp(log_sigma_poster_encoder)**np.float32(2) + \ (mu_poster_encoder - mu_prior_encoder)**np.float32(2)) / (np.float32(2)*(T.exp(log_sigma_prior_encoder)**np.float32(2))) - np.float32(0.5)) #Neural Generator h_arg1_decoder = T.tanh(T.dot(Arg1_W4,z) + Arg1_b4.dimshuffle(0, 'x')) h_arg2_decoder = T.tanh(T.dot(Arg2_W4,z) + Arg2_b4.dimshuffle(0, 'x')) y_arg1 = T.nnet.sigmoid(T.dot(Arg1_W5,h_arg1_decoder) + Arg1_b5.dimshuffle(0, 'x')) y_arg2 = T.nnet.sigmoid(T.dot(Arg2_W5,h_arg2_decoder) + Arg2_b5.dimshuffle(0, 'x')) logpxz = -(T.nnet.binary_crossentropy(y_arg1,x_arg1).sum() \ + T.nnet.binary_crossentropy(y_arg2,x_arg2).sum()) l_decoder = T.tanh(T.dot(L_W4,z) + L_b4.dimshuffle(0, 'x')) l_pred_decoder = T.tanh(T.dot(L_W4, mu_prior_encoder) + L_b4.dimshuffle(0, 'x')) l_decoder = T.tanh(T.dot(L_W6,l_decoder) + L_b6.dimshuffle(0, 'x')) l_pred_decoder = T.tanh(T.dot(L_W6,l_pred_decoder) + L_b6.dimshuffle(0, 'x')) l_decoder = T.tanh(T.dot(L_W7,l_decoder) + L_b7.dimshuffle(0, 'x')) l_pred_decoder = T.tanh(T.dot(L_W7,l_pred_decoder) + L_b7.dimshuffle(0, 'x')) l_decoder = T.tanh(T.dot(L_W8,l_decoder) + L_b8.dimshuffle(0, 'x')) l_pred_decoder = T.tanh(T.dot(L_W8,l_pred_decoder) + L_b8.dimshuffle(0, 'x')) pred_class = T.nnet.softmax(T.dot(L_Wc,l_decoder) + L_bc.dimshuffle(0, 'x')) logpc = -(T.nnet.categorical_crossentropy(pred_class,true_class).sum()) pred_level = T.nnet.softmax(T.dot(L_Wc,l_pred_decoder) + L_bc.dimshuffle(0, 'x')) logp = - logpxz - logpc + prior #Compute all the gradients derivatives = T.grad(logp,wrt=self.tparams) # apply gradient clipping here if self.clip_c > 0.: g2 = 0. for g in derivatives: g2 += (g**2).sum() new_grads = [] for g in derivatives: new_grads.append(T.switch(g2 > (self.clip_c**2), g / T.sqrt(g2) * self.clip_c, g)) derivatives = new_grads #Add the lowerbound so we can keep track of results derivatives.append(logp) self.gradientfunction = theano.function([x_arg1,x_arg2,true_class,eps], \ derivatives, on_unused_input='ignore') self.lowerboundfunction = theano.function([x_arg1,x_arg2,true_class,eps], \ logp, on_unused_input='ignore') self.predictionfunction = theano.function([x_arg1,x_arg2], \ pred_level.T, on_unused_input='ignore') #Adam Optimizer # This code is adapted from https://github.com/nyu-dl/dl4mt-tutorial/blob/master/session2/nmt.py def adam(lr, tparams, grads, inp, cost, beta1=0.9, beta2=0.999, e=1e-8): gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) for k, p in zip(self.params_names, tparams)] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inp, cost, updates=gsup, profile=False) updates = [] t_prev = theano.shared(np.float32(0.)) t = t_prev + 1. lr_t = lr * T.sqrt(1. - beta2**t) / (1. - beta1**t) for p, g in zip(tparams, gshared): m = theano.shared(p.get_value() * 0., p.name + '_mean') v = theano.shared(p.get_value() * 0., p.name + '_variance') m_t = beta1 * m + (1. - beta1) * g v_t = beta2 * v + (1. - beta2) * g**2 step = lr_t * m_t / (T.sqrt(v_t) + e) p_t = p - step updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t_prev, t)) f_update = theano.function([lr], [], updates=updates, on_unused_input='ignore', profile=False) return f_grad_shared, f_update lr = T.scalar(name='lr') self.f_grad_shared, self.f_update = \ adam(lr, self.tparams, derivatives[:-1], [x_arg1,x_arg2,true_class,eps], logp)
def make_node(self, _x): x = as_tensor_variable(_x) if x.type.ndim != 1: raise TypeError('AllocDiag only works on vectors', _x) return Apply(self, [x], [tensor.matrix(dtype=x.type.dtype)])
def main(): usage="""Segment a tomograph using convolutional neural network. Please run this program from the GUI in e2projectmanager.py.""" #print usage parser = EMArgumentParser(usage=usage,version=EMANVERSION) #parser.add_header(name="tmpheader", help='temp label', title="### This program is NOT avaliable yet... ###", row=0, col=0, rowspan=1, colspan=2, mode="train,test") parser.add_argument("--trainset",help="Training set.", default=None, guitype='filebox', browser="EMParticlesTable(withmodal=True)", row=1, col=0,rowspan=1, colspan=3, mode="train") parser.add_argument("--from_trained", type=str,help="Start from pre-trained neural network", default=None,guitype='filebox',browser="EMBrowserWidget(withmodal=True)", row=2, col=0, rowspan=1, colspan=3, mode="train,test") parser.add_argument("--netout", type=str,help="Output neural net file name", default="nnet_save.hdf",guitype='strbox', row=3, col=0, rowspan=1, colspan=3, mode="train") parser.add_argument("--learnrate", type=float,help="Learning rate ", default=.01, guitype='floatbox', row=4, col=0, rowspan=1, colspan=1, mode="train") parser.add_argument("--niter", type=int,help="Training iterations", default=20, guitype='intbox', row=4, col=1, rowspan=1, colspan=1, mode="train") parser.add_argument("--ncopy", type=int,help="Number of copies for each particle", default=1, guitype='intbox', row=5, col=0, rowspan=1, colspan=1, mode="train") parser.add_argument("--batch", type=int,help="Batch size for the stochastic gradient descent. Default is 20.", default=20, guitype='intbox', row=5, col=1, rowspan=1, colspan=1, mode="train") parser.add_argument("--nkernel", type=str,help="Number of kernels for each layer, from input to output. The number of kernels in the last layer must be 1. ", default="40,40,1", guitype='strbox', row=6, col=0, rowspan=1, colspan=1, mode="train") parser.add_argument("--ksize", type=str,help="Width of kernels of each layer, the numbers must be odd. Note the number of layers should be the same as the nkernel option. ", default="15,15,15", guitype='strbox', row=6, col=1, rowspan=1, colspan=1, mode="train") parser.add_argument("--poolsz", type=str,help="Pooling size for each layer. Note the number of layers should be the same as the nkernel option. ", default="2,1,1", guitype='strbox', row=7, col=0, rowspan=1, colspan=1, mode="train") parser.add_argument("--weightdecay", type=float,help="Weight decay. Used for regularization.", default=1e-6, guitype='floatbox', row=7, col=1, rowspan=1, colspan=1, mode="train") parser.add_argument("--trainout", action="store_true", default=False ,help="Output the result of the training set", guitype='boolbox', row=8, col=0, rowspan=1, colspan=1, mode='train[True]') parser.add_argument("--training", action="store_true", default=False ,help="Doing training", guitype='boolbox', row=8, col=1, rowspan=1, colspan=1, mode='train[True]') parser.add_argument("--tomograms", type=str,help="Tomograms input.", default=None,guitype='filebox',browser="EMBrowserWidget(withmodal=True)", row=1, col=0, rowspan=1, colspan=3, mode="test") parser.add_argument("--applying", action="store_true", default=False ,help="Applying the neural network on tomograms", guitype='boolbox', row=4, col=0, rowspan=1, colspan=1, mode='test[True]') parser.add_argument("--dream", action="store_true", default=False ,help="Iterativly applying the neural network on noise") parser.add_argument("--to3d", action="store_true", default=True ,help="convert to result to 3D.", guitype='boolbox', row=5, col=1, rowspan=1, colspan=1, mode='test') parser.add_argument("--output", type=str,help="Segmentation out file name", default="tomosegresult.hdf", guitype='strbox', row=3, col=0, rowspan=1, colspan=1, mode="test") parser.add_argument("--threads", type=int,help="Number of thread to use when applying neural net on test images. Not used during trainning", default=12, guitype='intbox', row=10, col=0, rowspan=1, colspan=1, mode="test") parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1) (options, args) = parser.parse_args() E2n=E2init(sys.argv,options.ppid) #### parse the options. options.nkernel=[int(i) for i in options.nkernel.split(',')] options.ksize=[int(i) for i in options.ksize.split(',')] if options.poolsz: options.poolsz=[int(i) for i in options.poolsz.split(',')] #### This is supposed to test the overfitting of the network by applying it on pure noise repeatly #### The function is no longer maintained so it may or may not work.. if options.dream: print("This function is no longer supported.. exit.") return #os.environ["THEANO_FLAGS"]="optimizer=None" #print "Testing on big images, Theano optimizer disabled" #import_theano() #convnet=load_model(options.from_trained) #dream(convnet,options) #E2end(E2n) #exit() if options.applying: apply_neuralnet(options) E2end(E2n) exit() os.environ["THEANO_FLAGS"]="optimizer=fast_run" import_theano() batch_size=options.batch #### Train da with particles first. if options.trainset==None: print("No training set input...exit.") exit() rng = np.random.RandomState(123) labelshrink=np.prod(options.poolsz) print("loading particles...") particles=load_particles(options.trainset,labelshrink,options.ncopy, rng) train_set_x= particles[0] labels=particles[1] shape=particles[2] ntrain=particles[3] #print "Number of particles: {}".format(train_set_x.shape.eval()[0]) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images image_shape=(batch_size, shape[2], shape[0],shape[1]) if options.from_trained!=None: convnet=load_model(options.from_trained) convnet.update_shape(image_shape) else: print("setting up model") convnet = StackedConvNet( rng, nkernel=options.nkernel, ksize=options.ksize, poolsz=options.poolsz, imageshape=image_shape ) #print shape if (options.niter>0): print("training the convolutional network...") classify=convnet.get_classify_func(train_set_x,labels,batch_size) learning_rate=options.learnrate n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size v0=np.inf nbad=0 for epoch in xrange(options.niter): # go through the training set c = [] #### train set loss v = [] #### valid set loss if epoch==0: print(classify(0,lr=learning_rate,wd=options.weightdecay)) for batch_index in xrange(n_train_batches): if batch_index*batch_size < ntrain: err=classify(batch_index, lr=learning_rate, wd=options.weightdecay) c.append(err) if epoch==0 and batch_index<5: print(err) else: err=classify(batch_index, lr=0, wd=options.weightdecay) v.append(err) #print len(v), len(c) learning_rate*=.9 print("Training epoch {:d}, train loss {:.3f}, learning rate {:.3f}".format(epoch, np.mean(c), learning_rate), end=' ') if len(v)>0: print("valid loss {:.3f}".format(np.mean(v)), end=' ') if np.mean(v)>v0 and np.mean(v)>np.mean(c): nbad+=1 print('*') else: nbad=0 print() v0=np.mean(v) if nbad>2: print("loss increase in validation set. Overfitting. Stop.") break else: print() ####################################### #print convnet.clslayer.W.get_value() #print convnet.clslayer.b.get_value() if options.trainout: print("Generating results ...") nsample=100 convnet.update_shape((nsample, shape[2], shape[0],shape[1])) test_cls = theano.function( inputs=[], outputs=convnet.clslayer.get_image(False), givens={ convnet.x: train_set_x[:nsample] } ) if options.netout.endswith(".hdf"): fname="trainout_{}".format(options.netout) else: fname="trainout_{}.hdf".format(options.netout) try:os.remove(fname) except: pass #print convnet.outsize,shape mid=test_cls() ipt= train_set_x[:nsample] ipt= ipt.eval() lb= labels[:nsample].eval() amp=[] for t in range(nsample): #### raw image if shape[2]==1: img=ipt[t].reshape(shape[0],shape[1]) else: img=ipt[t].reshape(shape[2],shape[0],shape[1]) img=img[shape[2]/2] e0 = from_numpy(img.astype("float32")) e0.write_image(fname,-1) #### manual annotation img=lb[t].reshape(convnet.outsize,convnet.outsize) e1 = from_numpy(img.astype("float32")) e1=e1.get_clip(Region((convnet.outsize-shape[0])/2,(convnet.outsize-shape[0])/2,shape[0],shape[0])) e1.scale(float(shape[0])/float(convnet.outsize)) e1.process_inplace("threshold.binary", {"value":.67}) e1.write_image(fname,-1) #### neural net output img=mid[t].reshape(convnet.outsize,convnet.outsize) e2 = from_numpy(img.astype("float32")) e2=e2.get_clip(Region((convnet.outsize-shape[0])/2,(convnet.outsize-shape[0])/2,shape[0],shape[0])) #print float(shape[0])/float(convnet.outsize) e2.scale(float(shape[0])/float(convnet.outsize)) e2.write_image(fname,-1) #### measure the amplitude of the neural network output by comparing it to the label e2.mult(e1) amp.append(e2["mean_nonzero"]) print("amplitude: ", np.mean(amp)) convnet.amplitude=np.mean(amp) print("Writing output on training set in {}".format(fname)) save_model(convnet, options.netout, options) print("Done") E2end(E2n)
def build_model(tparams, model_options): x = T.matrix('x', dtype='float32') start_temperature = T.scalar('start_temperature', dtype='float32') num_step = T.scalar('num_step', dtype='int32') loss = compute_loss(x, model_options, tparams, start_temperature, num_step) return x, loss, start_temperature, num_step
def __init__(self,N_tot,D,Q,Domain_number,Ydim,Hiddenlayerdim1,Hiddenlayerdim2,num_MC,n_rff): ######################################## #BCなXの設定 後でこれもレイヤー化する self.Xlabel=T.matrix('Xlabel') self.X=T.matrix('X') self.Y=T.matrix('Y') N=self.X.shape[0] self.Weight=T.matrix('Weight') ######################## #hiddenlyaerの作成 self.Data_input=T.tile(self.X,(num_MC,1,1)) ########################################## ####X側の推論 self.RFF_X=RFFLayer(rng, self.Data_input, n_in=D, n_out=Q, num_MC=num_MC,num_FF=n_rff,Domain_number=Domain_number,number="X",Domain_consideration=True) self.params = self.RFF_X.all_params self.hyp_params=self.RFF_X.hyp_params self.variational_params=self.RFF_X.variational_params ############################################################################################## ###Y側の計算 self.RFF_Y=RFFLayer(rng, self.RFF_X.output, n_in=Q, n_out=Ydim, num_MC=num_MC,num_FF=n_rff,number="Y",Domain_consideration=False) self.params.extend(self.RFF_Y.all_params) self.hyp_params.append(self.RFF_Y.lhyp) self.variational_params.extend(self.RFF_Y.variational_params) ########################################## #パラメータの格納 #self.no_updates=self.RFF_X.no_update self.wrt={} for i in self.params: self.wrt[str(i)]=i ########################################### ###目的関数 #############X側 #self.LL_X = self.RFF_X.likelihood_domain(self.X,self.Xlabel)*N_tot/(N*num_MC) self.KL_WX = self.RFF_X.KL_W #############Y側 self.LL_Y = self.RFF_Y.liklihood_nodomain(self.Y)*N_tot/(N*num_MC) self.KL_WY = self.RFF_Y.KL_W #y=self.Gaussian_layer_Y.softmax_class() #self.LLY = -T.mean(T.nnet.categorical_crossentropy(y, self.Y))*N #############真ん中と予測 #self.error = self.RFF_Y.error_RMSE(self.Y) pred = T.mean(self.RFF_Y.output,0) self.error = (T.mean((self.Y - pred)**2,0))**0.5 #mu=T.mean(target,0) #self.error= (T.mean(T.mean((self.Y[None,:,:] - self.RFF_Y.output)**2,0)))**0.5 ########################################### self.MMD=self.RFF_Y.MMD_central_penalty(self.Xlabel)*N_tot
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, nkerns=[48, 64, 96], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer 这样默认就是第一次20个kernel,第二层50个kernel? """ rng = numpy.random.RandomState(23455) ''' 原来的load data datasets = load_data(dataset) ''' datasets = loadData() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size print(n_train_batches) print(n_valid_batches) if n_test_batches == 0: n_test_batches = 1 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (50, 50) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 50, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 50, 50), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 23, 23), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2)) layer1_3 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 10, 10), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1_3.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[2] * 4 * 4, n_out=1000, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=1000, n_out=58) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1_3.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) best_params = params if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) saveParams(params)
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features',)) dataset_test = MNIST(['test'], sources=('features',)) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features',)) dataset_test = CIFAR10(['test'], sources=('features',)) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width=32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width=64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets = ['train'], which_format="64", sources=('features',), load_in_memory=False) dataset_test = CelebA(which_sets = ['test'], which_format="64", sources=('features',), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme = tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme = ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features',)) dataset_train = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s."%args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples%args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature, step_chain = build_model(tparams, model_options) inps = [x.astype('float32'), start_temperature, step_chain] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') step_chain_part = T.scalar('step_chain_part', dtype='int32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part) print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' #for param in tparams: # print param # print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size,3*32*32),) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): data_run = data_run.astype('float32') meta_cost.append(f_grad_shared(data_run, temperature_forward, meta_step)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion(data_run, temperature_forward, meta_step) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. logger.log({'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost}) if batch_index%20==0: print batch_index, "cost", cost if batch_index%1000==0: print 'saving params' params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(batch_index) + '.npz') if batch_index%200==0: count_sample += 1 ''' temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps -1 )) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(data[0].astype('float32'), temperature_forward, num_step) x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(x_data.astype('float32'), temperature_forward, num_step) x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor; x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1 )) for i in range(args.num_steps*args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1) print 'On backward step number, using temperature', i, temperature reverse_time(scl, shft, x_data, model_dir + '/'+ "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ''' if args.noise == "gaussian": x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size,INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps*args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1) print 'On step number, using temperature', i, temperature reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()
import theano import theano.tensor as T from load import mnist import numpy as np from costs import categorical_crossentropy from updates import Adadelta batch_size = 128 X = T.matrix() Y = T.matrix() n_in = 28*28 n_hidden = 512 n_out = 10 w_in = theano.shared(floatX(np.random.randn(n_in,n_hidden)*0.01)) w_out = theano.shared(floatX(np.random.randn(n_hidden,n_out)*0.01)) b_in = theano.shared(floatX(np.zeros((n_hidden)))) b_out = theano.shared(floatX(np.zeros((n_out)))) def model(X): h = T.tanh(T.dot(X,w_in)+b_in) y = T.nnet.softmax(T.dot(h,w_out)+b_out) return y out = model(X) err = categorical_crossentropy(Y,out) params = [w_in,b_in,w_out,b_out] grads = T.grad(err,params) updates = Adadelta(params,grads) train = theano.function([X,Y],err,updates=updates)
def main(): parser = build_parser() args = parser.parse_args() np.random.seed(args.seed) trng = RandomStreams(args.seed) rng = np.random.RandomState(args.seed + 1) model_file = args.model_prefix + "_pars.npz" model_opts = args.model_prefix + "_opts.pkl" model_options = pkl.load(open(model_opts, 'rb')) # Load data data = IMDB_JMARS("./experiments/data", seq_len=16, batch_size=args.nb_samples, topk=16000) model_options["dim_input"] = data.voc_size for num, (x, y, x_mask) in enumerate(data.get_valid_batch()): data.print_batch(x) break params = init_params(model_options) print('Loading model parameters...') params = load_params(model_file, params) tparams = init_tparams(params) x = T.lmatrix('x') y = T.lmatrix('y') x_mask = T.matrix('x_mask') # Debug test_value x.tag.test_value = np.random.rand(11, 20).astype("int64") y.tag.test_value = np.random.rand(11, 20).astype("int64") x_mask.tag.test_value = np.ones((11, 20)).astype("float32") is_train.tag.test_value = np.float32(0.) zmuv = T.tensor3('zmuv') zmuv.tag.test_value = np.ones( (11, 20, model_options['dim_z'])).astype("float32") # build the symbolic computational graph nll_rev, states_rev, updates_rev = \ build_rev_model(tparams, model_options, x, y, x_mask) nll_gen, states_gen, kld, rec_cost_rev, updates_gen, \ log_pxIz, log_pz, log_qzIx, z, _ = \ build_gen_model(tparams, model_options, x, y, x_mask, zmuv, states_rev) # Build sampler f_next = build_sampler(tparams, model_options, trng, provide_z=True) # Build inference get_latents = theano.function([x, y, x_mask, zmuv], z, updates=(updates_gen + updates_rev), givens={is_train: np.float32(0.)}) while True: s1 = raw_input("s1:").strip().split() s2 = raw_input("s2:").strip().split() s1_id = [data.word2idx.get(word, data.unk_id) for word in s1] s2_id = [data.word2idx.get(word, data.unk_id) for word in s2] batch = data.prepare_batch([s1_id, s2_id]) data.print_batch(batch[0]) zmuv = rng.normal(loc=0.0, scale=1.0, size=(batch[0].shape[1], 2, model_options['dim_z'])).astype('float32') batch_z = get_latents(batch[0].T, batch[1].T, batch[2].T, zmuv) z1 = batch_z[:, [0], :] z2 = batch_z[:, [1], :] print("Beam Search") data.print_batch(batch[0][[0]], eos_id=data.eos_id, print_number=False) for i in np.linspace(0, 1, 11): print("{}: ".format(i), end="") z = ((1 - i) * z1) + (i * z2) # Interpolate latent z = np.repeat(z, 10, axis=1) sample, sample_score = beam_sample(tparams, f_next, model_options, maxlen=20, zmuv=z, unk_id=data.unk_id, eos_id=data.eos_id, bos_id=data.bos_id) sample = [sample[0]] data.print_batch(sample, eos_id=data.eos_id, print_number=False) data.print_batch(batch[0][[1]], eos_id=data.eos_id, print_number=False) # Interpolation print("Samples") data.print_batch(batch[0][[0]], eos_id=data.eos_id, print_number=False) for i in np.linspace(0, 1, 11): print("{}: ".format(i), end="") z = ((1 - i) * z1) + (i * z2) # Interpolate latent z = np.repeat(z, 10, axis=1) sample, sample_score = gen_sample(tparams, f_next, model_options, maxlen=20, argmax=False, zmuv=z, unk_id=data.unk_id, eos_id=data.eos_id, bos_id=data.bos_id) sample = [sample.T[np.argsort(sample_score)[-1]]] data.print_batch(sample, eos_id=data.eos_id, print_number=False) data.print_batch(batch[0][[1]], eos_id=data.eos_id, print_number=False) print("Argmax") data.print_batch(batch[0][[0]], eos_id=data.eos_id, print_number=False) for i in np.linspace(0, 1, 11): print("{}: ".format(i), end="") z = ((1 - i) * z1) + (i * z2) # Interpolate latent sample, sample_score = gen_sample(tparams, f_next, model_options, maxlen=20, argmax=True, zmuv=z, unk_id=data.unk_id, eos_id=data.eos_id, bos_id=data.bos_id) data.print_batch(sample.T, eos_id=data.eos_id, print_number=False) data.print_batch(batch[0][[1]], eos_id=data.eos_id, print_number=False) raw_input("-- Next --") sys.exit(0)
cost = T.mean((self.x - self.z)**2) gparams = T.grad(cost, self.params) updates = [(param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams)] # updates = gradient_updates_momentum(cost, self.params) return (cost, updates) print 'nTrials x nFeatures ', np.shape(X_eeg) print 'Target vector ', np.shape(y_eeg) print 'Total number of subjects: ', subject_count """ Generate symbolic variables for input (X and y represent a minibatch) """ X = T.matrix('X') # 2100 x 60 data y = T.vector('y') # labels, presented as 1D vector of [int] labels """ Construct the logistic regression class """ rng = np.random.RandomState(1234) n_hidden = 50 n_visible = np.shape(X_eeg)[1] da = dA(numpy_rng=rng, input=X, n_visible=n_visible, n_hidden=n_hidden) cost, updates = da.get_cost_updates(corruption_level=0.2, learning_rate=0.01) train = theano.function(inputs=[X], outputs=cost, updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=da.z) """ Leave One Out """
sys.path.insert(1, os.path.join(base_path, '../../common')) sys.path.insert(2, os.path.join(base_path, '../../database')) sys.path.insert(1, os.path.join(base_path, '../')) from db import DB from project import Project from performance import Performance from data import Data from cnn import CNN if __name__ == '__main__': # load the model to use for performance evaluation x = T.matrix('x') rng = numpy.random.RandomState(1234) # retrieve the project settings from the database project = DB.getProject('evalcnn') # create the model based on the project model = CNN(rng=rng, input=x, offline=True, batch_size=project.batchSize, patch_size=project.patchSize, nkerns=project.nKernels, kernel_sizes=project.kernelSizes, hidden_sizes=project.hiddenUnits,
def make_matrix(): """ Returns a new Theano matrix. """ return T.matrix()
def train_conv_net(datasets, U, img_w=300, filter_hs=[3, 4, 5], hidden_units=[100, 2], dropout_rate=[0.5], shuffle_batch=True, n_epochs=25, batch_size=50, lr_decay=0.95, conv_non_linear="relu", activations=[Iden], sqr_norm_lim=9, non_static=True): """ Train a simple conv net img_h = sentence length (padded where necessary) img_w = word vector length (300 for word2vec) filter_hs = filter window sizes hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer sqr_norm_lim = s^2 in the paper lr_decay = adadelta decay parameter """ rng = np.random.RandomState(3435) img_h = len(datasets[0][0]) - 1 filter_w = img_w feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_w)) pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1)) parameters = [("image shape", img_h, img_w), ("filter shape", filter_shapes), ("hidden_units", hidden_units), ("dropout", dropout_rate), ("batch_size", batch_size), ("non_static", non_static), ("learn_decay", lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim), ("shuffle_batch", shuffle_batch)] print parameters #define model architecture index = T.lscalar() x = T.matrix('x') y = T.ivector('y') Words = theano.shared(value=U, name="Words") zero_vec_tensor = T.vector() zero_vec = np.zeros(img_w) set_zero = theano.function([zero_vec_tensor], updates=[ (Words, T.set_subtensor(Words[0, :], zero_vec_tensor)) ], allow_input_downcast=True) layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0], 1, x.shape[1], Words.shape[1])) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_h, img_w), filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) hidden_units[0] = feature_maps * len(filter_hs) classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) #define parameters of the model and update functions using adadelta params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: #if word vectors are allowed to change, add them as model parameters params += [Words] cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate #extra data (at random) np.random.seed(3435) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = np.random.permutation(datasets[0]) extra_data = train_set[:extra_data_num] new_data = np.append(datasets[0], extra_data, axis=0) else: new_data = datasets[0] new_data = np.random.permutation(new_data) n_batches = new_data.shape[0] / batch_size n_train_batches = int(np.round(n_batches * 0.9)) #divide train set into train/val sets test_set_x = datasets[1][:, :img_h] test_set_y = np.asarray(datasets[1][:, -1], "int32") train_set = new_data[:n_train_batches * batch_size, :] val_set = new_data[n_train_batches * batch_size:, :] train_set_x, train_set_y = shared_dataset( (train_set[:, :img_h], train_set[:, -1])) val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1])) n_val_batches = n_batches - n_train_batches val_model = theano.function( [index], classifier.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size] }, allow_input_downcast=True) #compile theano functions to get train/val/test errors test_model = theano.function( [index], classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, allow_input_downcast=True) train_model = theano.function( [index], cost, updates=grad_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, allow_input_downcast=True) test_pred_layers = [] test_size = test_set_x.shape[0] test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (test_size, 1, img_h, Words.shape[1])) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_y_pred = classifier.predict(test_layer1_input) test_error = T.mean(T.neq(test_y_pred, y)) test_model_all = theano.function([x, y], test_error, allow_input_downcast=True) #start training over mini-batches print '... training' epoch = 0 best_val_perf = 0 val_perf = 0 test_perf = 0 cost_epoch = 0 while (epoch < n_epochs): start_time = time.time() epoch = epoch + 1 if shuffle_batch: for minibatch_index in np.random.permutation( range(n_train_batches)): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) train_losses = [test_model(i) for i in xrange(n_train_batches)] train_perf = 1 - np.mean(train_losses) val_losses = [val_model(i) for i in xrange(n_val_batches)] val_perf = 1 - np.mean(val_losses) print( 'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%' % (epoch, time.time() - start_time, train_perf * 100., val_perf * 100.)) if val_perf >= best_val_perf: best_val_perf = val_perf test_loss = test_model_all(test_set_x, test_set_y) test_perf = 1 - test_loss return test_perf