예제 #1
0
 def test_infer_shape(self):
     admat = matrix()
     bdmat = matrix()
     admat_val = numpy.random.rand(3, 4).astype(config.floatX)
     bdmat_val = numpy.random.rand(3, 4).astype(config.floatX)
     self._compile_and_check([admat, bdmat], [SoftmaxGrad()(admat, bdmat)],
                         [admat_val, bdmat_val], SoftmaxGrad)
예제 #2
0
def test_min_informative_str():
    """ evaluates a reference output to make sure the
        min_informative_str function works as intended """

    A = tensor.matrix(name='A')
    B = tensor.matrix(name='B')
    C = A + B
    C.name = 'C'
    D = tensor.matrix(name='D')
    E = tensor.matrix(name='E')

    F = D + E
    G = C + F

    mis = min_informative_str(G).replace("\t", "        ")

    reference = """A. Elemwise{add,no_inplace}
 B. C
 C. Elemwise{add,no_inplace}
  D. D
  E. E"""

    if mis != reference:
        print('--' + mis + '--')
        print('--' + reference + '--')

    assert mis == reference
예제 #3
0
파일: test_simple.py 프로젝트: Wiebke/breze
def test_linear_regression():
    inpt = T.matrix('inpt')
    inpt.tag.test_value = np.zeros((3, 10))
    inpt.tag.test_value
    target = T.matrix('target')
    target.tag.test_value = np.zeros((3, 2))

    l = AffineNonlinear(inpt, 10, 2, 'tanh')

    loss = squared(target, l.output).sum(1).mean()

    m = SupervisedModel(inpt=inpt, target=target, output=l.output, loss=loss,
                        parameters=l.parameters)

    f_predict = m.function([m.inpt], m.output)
    f_loss = m.function([m.inpt, m.target], m.loss)

    X = np.zeros((20, 10))
    Z = np.zeros((20, 2))

    Y = f_predict(X)

    assert Y.shape == (20, 2), 'ouput has wrong shape'

    l = f_loss(X, Z)

    assert np.array(l).ndim == 0, 'loss is not a scalar'
예제 #4
0
def test_sequence_variable_inputs():
    x, y = tensor.matrix(), tensor.matrix()

    parallel_1 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=dict(input_1=4, input_2=5),
                          output_dims=dict(input_1=3, input_2=2),
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    parallel_2 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=dict(input_1=3, input_2=2),
                          output_dims=dict(input_1=5, input_2=4),
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    sequence = Sequence([parallel_1.apply, parallel_2.apply])
    sequence.initialize()
    new_x, new_y = sequence.apply(x, y)
    x_val = numpy.ones((4, 4), dtype=theano.config.floatX)
    y_val = numpy.ones((4, 5), dtype=theano.config.floatX)
    assert_allclose(
        new_x.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot(
            2 * numpy.ones((3, 5))) + numpy.ones((4, 5)))
    assert_allclose(
        new_y.eval({y: y_val}),
        (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot(
            2 * numpy.ones((2, 4))) + numpy.ones((4, 4)))
예제 #5
0
def Z_LSTM(input_var, z_dim=256, nhid=512, layers=2, gradclip=10, training=True):
    ret = {}
    state_vars = []
    ret['input'] = layer = nn.layers.InputLayer(input_var=input_var, shape=(None, None, z_dim))
    batchsize, seqlen, _ = layer.input_var.shape
    for lay in xrange(layers):
        ret['drop_{}'.format(lay)] = layer = nn.layers.DropoutLayer(layer, p=0.3)
        if training:
            ret['lstm_{}'.format(lay)] = layer = LSTMSampleableLayer(layer, nhid,
                grad_clipping=gradclip, learn_init=True)
        else:
            cell_var = T.matrix('cell_var_{}'.format(lay))
            hid_var = T.matrix('hid_var_{}'.format(lay))
            state_vars.append(cell_var)
            state_vars.append(hid_var)
            ret['lstm_{}'.format(lay)] = layer = LSTMSampleableLayer(layer, nhid,
                cell_init=cell_var, hid_init=hid_var)
        ret['cell_{}'.format(lay)] = nn.layers.SliceLayer(layer, axis=2,
                indices=slice(None,nhid))
        ret['hid_{}'.format(lay)] = layer = nn.layers.SliceLayer(layer, axis=2,
                indices=slice(nhid,None))
    ret['reshape'] = layer = nn.layers.ReshapeLayer(layer, (-1, nhid))
    ret['project'] = layer = nn.layers.DenseLayer(layer, num_units=z_dim, nonlinearity=None)
    ret['output'] = layer = nn.layers.ReshapeLayer(layer, (batchsize, seqlen, z_dim))
    # final state slice layers for passing to next instance of lstm
    for lay in xrange(layers):
        ret['cellfinal_{}'.format(lay)] = nn.layers.SliceLayer(ret['cell_{}'.format(lay)],
                axis=1, indices=-1)
        ret['hidfinal_{}'.format(lay)] = nn.layers.SliceLayer(ret['hid_{}'.format(lay)], 
                axis=1, indices=-1)
    return ret, state_vars
예제 #6
0
 def initialise_model(self, X_train, y_train):
     print 'Initialising model...'
     self.input_shape = X_train.shape[1]
     input_var = T.matrix('inputs')
     target_var = T.matrix('targets')
     
     if self.normalise:
         y_train = self.normalise_y(y_train, reset = True)
         X_train = self.normalise_X(X_train, reset = True)
 
     # Create neural network model
     self.network = self.build_custom_mlp(input_var)
     prediction = lasagne.layers.get_output(self.network)
     loss = lasagne.objectives.squared_error(prediction, target_var)
     loss = loss.mean()
     params = lasagne.layers.get_all_params(self.network, trainable=True)
     updates = lasagne.updates.nesterov_momentum(loss, params, 
                                         learning_rate=self.learning_rate, 
                                         momentum=self.momentum)
     test_prediction = lasagne.layers.get_output(self.network,
                                                 deterministic=True)
     test_loss = lasagne.objectives.squared_error(test_prediction,
                                                  target_var)
     test_loss = test_loss.mean()
     self.train_fn = theano.function([input_var, target_var], loss, 
                                updates=updates, allow_input_downcast=True)
     self.predict_output = theano.function([input_var],
                                           outputs=test_prediction,
                                           allow_input_downcast=True)
     self.initialised = True
	def __init__(self, embedding_dim=100, num_hidden_layers=2, hidden_dim=200, in_dropout_p=0.2, hidden_dropout_p=0.5, update_hyperparams={'learning_rate': 0.01}):
		self.embedding_dim = embedding_dim
		self.num_hidden_layers = num_hidden_layers
		self.hidden_dim = hidden_dim
		self.in_dropout_p = in_dropout_p
		self.hidden_dropout_p = update_hyperparams
	
		print >> sys.stderr, 'Building computation graph for discriminator...'		
		self.input_var = T.matrix('input')
		self.target_var = T.matrix('targer')

		self.l_in = lasagne.layers.InputLayer(shape=(None, self.embedding_dim), input_var=T.tanh(self.input_var), name='l_in')
		self.l_in_dr = lasagne.layers.DropoutLayer(self.l_in, 0.2)
		self.layers = [self.l_in, self.l_in_dr]
		for i in xrange(self.num_hidden_layers):
			l_hid = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=self.hidden_dim, nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.GlorotUniform(gain=leaky_relu_gain), name=('l_hid_%s' % i)))
			l_hid_dr = lasagne.layers.DropoutLayer(l_hid, 0.5)
			self.layers.append(l_hid)
			self.layers.append(l_hid_dr)
		self.l_preout = lasagne.layers.batch_norm(lasagne.layers.DenseLayer(self.layers[-1], num_units=1, nonlinearity=None, name='l_preout'))
		self.l_out = lasagne.layers.NonlinearityLayer(self.l_preout, nonlinearity=lasagne.nonlinearities.sigmoid, name='l_out')

		self.prediction = lasagne.layers.get_output(self.l_out)
		self.loss = lasagne.objectives.binary_crossentropy(self.prediction, self.target_var).mean()
		self.accuracy = T.eq(T.ge(self.prediction, 0.5), self.target_var).mean()

		self.params = lasagne.layers.get_all_params(self.l_out, trainable=True)
		self.updates = lasagne.updates.adam(self.loss, self.params, **update_hyperparams)

		print >> sys.stderr, 'Compiling discriminator...'
		self.train_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy], updates=self.updates)
		self.eval_fn = theano.function([self.input_var, self.target_var], [self.loss, self.accuracy])
예제 #8
0
파일: objectives.py 프로젝트: igul222/P-op
    def __init__(self, input_layer, loss_function=mse, aggregation='mean'):
        """
        Constructor

        :parameters:
            - input_layer : a `Layer` whose output is the networks prediction
                given its input
            - loss_function : a loss function of the form `f(x, t, m)` that
                returns a scalar loss given tensors that represent the
                predicted values, true values and mask as arguments.
            - aggregation : either:
                - `None` or `'mean'` : the elements of the loss will be
                multiplied by the mask and the mean returned
                - `'sum'` : the elements of the loss will be multiplied by
                the mask and the sum returned
                - `'normalized_sum'` : the elements of the loss will be
                multiplied by the mask, summed and divided by the sum of
                the mask
        """
        self.input_layer = input_layer
        self.loss_function = loss_function
        self.target_var = T.matrix("target")
        self.mask_var = T.matrix("mask")
        if aggregation not in self._valid_aggregation:
            raise ValueError('aggregation must be \'mean\', \'sum\', '
                             '\'normalized_sum\' or None,'
                             ' not {0}'.format(aggregation))
        self.aggregation = aggregation
예제 #9
0
 def init_variables(self):
     self.input_var = T.matrix('inputs')
     self.side_var = T.matrix('contexts')
     # do regression
     #self.target_var = T.ivector('targets')
     self.target_var = T.vector('targets')
     self.num_classes = 1 # regression -> dim matters, not classes
예제 #10
0
    def __init__(self, dnodex,dim):
        X = T.matrix()
        Y = T.matrix()

	eta = T.scalar()
        temperature=T.scalar()

        num_input = len(format(dnodex.npoi,'b'))
        num_hidden = dim
        num_output = len(format(dnodex.npoi,'b'))

        inputs = InputLayer(X, name="inputs")
        lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1")
        lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2")
        #lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3")
        softmax = SoftmaxLayer(num_hidden, num_output, input_layer=lstm2, name="yhat", temperature=temperature)

        Y_hat = softmax.output()

        self.layers = inputs, lstm1, lstm2, softmax

        params = get_params(self.layers)
        caches = make_caches(params)

        cost = T.mean(T.nnet.categorical_crossentropy(Y_hat, Y))
        updates = momentum(cost, params, caches, eta)

        self.train = theano.function([X, Y, eta, temperature], cost, updates=updates, allow_input_downcast=True)

        predict_updates = one_step_updates(self.layers)
        self.predict_char = theano.function([X, temperature], Y_hat, updates=predict_updates, allow_input_downcast=True)
예제 #11
0
파일: nn.py 프로젝트: ubuntu733/DeepNet
def fine_train(nn,datasets,learning_Rate,batch_sizes,epochs):
	train_set_x, train_set_y = datasets[0]
	n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_sizes
	
	train_label = T.cast(train_label,'float64')
	index = T.lscalar()
	x = T.matrix('x')
	y = T.matrix('y')
	min_batch_cost = []
	if nn is None:
		mynn = ForwordNN(x,y,n_in,n_out,hidden_sizes)
	else:
		mynn=nn
	cost,update = mynn.get_cost_update(x,y,learning_Rate)
	train_nn = theano.function([index],
				cost,
				updates = update,
				givens = {
							x:train_data[index*batch_sizes:(index+1)*batch_sizes,:],
							y:train_label[index*batch_sizes:(index+1)*batch_sizes,:]
						}
				)
	for num_epochs in range(epochs):
		t1=time.time()
		for num_batch in xrange(n_train_batchs):
			min_batch_cost.append(train_nn(num_batch))
		t2=time.time()
		print 'The %d/%dth training,takes %f seconds,cost is %f' %(num_epochs+1,epochs,(t2-t1),np.mean(min_batch_cost))
	return mynn	
예제 #12
0
 def __init__(self, model, cost, monitoring_dataset, batch_size):
     """
     Parameters
     ----------
     model : pylearn2.models.model.Model
         the model whose best parameters we want to keep track of
     cost : tensor_like
         cost function used to evaluate the model's performance
     monitoring_dataset : pylearn2.datasets.dataset.Dataset
         dataset on which to compute the cost
     batch_size : int
         size of the batches used to compute the cost
     """
     self.model = model
     self.cost = cost
     self.dataset = monitoring_dataset
     self.batch_size = batch_size
     self.minibatch = T.matrix('minibatch')
     self.target = T.matrix('target')
     if cost.supervised:
         self.supervised = True
         self.cost_function = theano.function(inputs=[self.minibatch,
                                                       self.target],
                                               outputs=cost(model,
                                                            self.minibatch,
                                                            self.target))
     else:
         self.supervised = False
         self.cost_function = theano.function(inputs=[self.minibatch],
                                              outputs=cost(model,
                                                           self.minibatch))
     self.best_cost = numpy.inf
     self.best_params = model.get_param_values()
예제 #13
0
def set_generation_function(recurrent_model, output_model):
    # set input data (1*num_samples*features)
    input_data  = tensor.matrix(name='input_seq', dtype=floatX)
    # set init hidden/cell(num_samples*hidden_size)
    prev_hidden_data = tensor.matrix(name='prev_hidden_data', dtype=floatX)
    prev_cell_data   = tensor.matrix(name='prev_cell_data', dtype=floatX)

    # get hidden data
    recurrent_data = get_tensor_output(input=[input_data, prev_hidden_data, prev_cell_data], layers=recurrent_model, is_training=False)
    cur_hidden_data = recurrent_data[0]
    cur_cell_data   = recurrent_data[1]

    # get prediction data
    output_data = get_tensor_output(input=cur_hidden_data, layers=output_model, is_training=False)

    # input data
    generation_function_inputs  = [input_data,
                                   prev_hidden_data,
                                   prev_cell_data]
    generation_function_outputs = [cur_hidden_data,
                                   cur_cell_data,
                                   output_data]

    generation_function = theano.function(inputs=generation_function_inputs,
                                          outputs=generation_function_outputs,
                                          on_unused_input='ignore')
    return generation_function
    def test_compute_flag(self):
        orig_compute_test_value = theano.config.compute_test_value
        try:
            x = T.matrix('x')
            y = T.matrix('y')
            y.tag.test_value = numpy.random.rand(4,5).astype(config.floatX)

            # should skip computation of test value
            theano.config.compute_test_value = 'off'
            z = T.dot(x,y)
            assert not hasattr(z.tag, 'test_value')

            # should fail when asked by user
            theano.config.compute_test_value = 'raise'
            self.assertRaises(ValueError, T.dot, x, y)

            # test that a warning is raised if required
            theano.config.compute_test_value = 'warn'
            warnings.simplefilter('error', UserWarning)
            try:
                self.assertRaises(UserWarning, T.dot, x, y)
            finally:
                # Restore the default behavior.
                # TODO There is a cleaner way to do this in Python 2.6, once
                # Theano drops support of Python 2.4 and 2.5.
                warnings.simplefilter('default', UserWarning)
        finally:
            theano.config.compute_test_value = orig_compute_test_value
    def test_string_var(self):
        orig_compute_test_value = theano.config.compute_test_value
        try:
            theano.config.compute_test_value = 'raise'

            x = T.matrix('x')
            x.tag.test_value = numpy.random.rand(3,4).astype(config.floatX)
            y = T.matrix('y')
            y.tag.test_value = numpy.random.rand(4,5).astype(config.floatX)

            z = theano.shared(numpy.random.rand(5,6).astype(config.floatX))

            # should work
            out = T.dot(T.dot(x,y), z)
            assert hasattr(out.tag, 'test_value')
            tf = theano.function([x,y], out)
            assert _allclose(
                    tf(x.tag.test_value, y.tag.test_value),
                    out.tag.test_value)

            def f(x,y,z):
                return T.dot(T.dot(x,y),z)

            # this test should fail
            z.set_value(numpy.random.rand(7,6).astype(config.floatX))
            self.assertRaises(ValueError, f, x, y, z)
        finally:
            theano.config.compute_test_value = orig_compute_test_value
예제 #16
0
 def _construct_compute_fe_terms(self):
     """
     Construct a function for computing terms in variational free energy.
     """
     # setup some symbolic variables for theano to deal with
     xi = T.matrix()
     xo = T.matrix()
     _, hi_zmuv = self._construct_zmuv_samples(xi, 1)
     # construct values to output
     nll = self.nlli[-1]
     kld = self.kld_z.flatten() + self.kld_hi_q2p.flatten()
     # compile theano function for a one-sample free-energy estimate
     fe_term_sample = theano.function(inputs=[ xi, xo ], \
             outputs=[nll, kld], \
             givens={self.x_in: xi, \
                     self.x_out: xo, \
                     self.hi_zmuv: hi_zmuv}, \
             updates=self.scan_updates)
     # construct a wrapper function for multi-sample free-energy estimate
     def fe_term_estimator(XI, XO, sample_count):
         # compute a multi-sample estimate of variational free-energy
         nll_sum = np.zeros((XI.shape[0],))
         kld_sum = np.zeros((XI.shape[0],))
         for i in range(sample_count):
             result = fe_term_sample(XI, XO)
             nll_sum += result[0].ravel()
             kld_sum += result[1].ravel()
         mean_nll = nll_sum / float(sample_count)
         mean_kld = kld_sum / float(sample_count)
         return [mean_nll, mean_kld]
     return fe_term_estimator
예제 #17
0
 def _construct_sample_from_prior(self):
     """
     Construct a function for drawing independent samples from the
     distribution generated by this MultiStageModel. This function returns
     the full sequence of "partially completed" examples.
     """
     z_sym = T.matrix()
     x_sym = T.matrix()
     irs = self.ir_steps
     oputs = [self.obs_transform(self.s0)]
     oputs.extend([self.obs_transform(self.si[i]) for i in range(irs)])
     _, hi_zmuv = self._construct_zmuv_samples(x_sym, 1)
     sample_func = theano.function(inputs=[z_sym, x_sym], outputs=oputs, \
             givens={ self.z: z_sym, \
                      self.x_in: T.zeros_like(x_sym), \
                      self.x_out: T.zeros_like(x_sym), \
                      self.hi_zmuv: hi_zmuv }, \
             updates=self.scan_updates)
     def prior_sampler(samp_count):
         x_samps = to_fX( np.zeros((samp_count, self.obs_dim)) )
         old_switch = self.train_switch.get_value(borrow=False)
         # set model to generation mode
         self.set_train_switch(switch_val=0.0)
         z_samps = to_fX( npr.randn(samp_count, self.z_dim) )
         model_samps = sample_func(z_samps, x_samps)
         # set model back to either training or generation mode
         self.set_train_switch(switch_val=old_switch)
         return model_samps
     return prior_sampler
예제 #18
0
    def test_wrong_dims(self):
        a = tt.matrix()
        increment = tt.matrix()
        index = 0

        self.assertRaises(TypeError, tt.set_subtensor, a[index], increment)
        self.assertRaises(TypeError, tt.inc_subtensor, a[index], increment)
예제 #19
0
 def __init__(self,rng=None,theano_rng=None,n_in=121,hidden_layers_sizes=[400,400,400],n_hidden=6,n_out=1):
     self.dA_layers = []
     self.sigmoid_layers = []
     self.params = []
     self.n_layers = len(hidden_layers_sizes)
     assert self.n_layers > 0
     if not theano_rng:
         theano_rng = RandomStreams(rng.randint(2**30))
     self.x = T.matrix('x')
     self.y = T.matrix('y')
     for i in xrange(self.n_layers):
         if i == 0:
             input_size = n_in
             layer_input = self.x
         else:
             input_size = hidden_layers_sizes[i-1]
             layer_input = self.sigmoid_layers[-1].output
         sigmoid_layer = regressionLayer(rng=rng,input=layer_input,n_in=input_size,n_out=hidden_layers_sizes[i],activation=T.tanh)
         self.sigmoid_layers.append(sigmoid_layer)
         self.params.extend(sigmoid_layer.params)
         dA_layer = daLayer(rng=rng,theano_rng=theano_rng,input=layer_input,n_in=input_size,n_hidden=hidden_layers_sizes[i],W=sigmoid_layer.W,bhid=sigmoid_layer.b,activation=T.tanh)
         self.dA_layers.append(dA_layer)
     self.reg_layer1 = regressionLayer(rng=rng,input=self.sigmoid_layers[-1].output,n_in=hidden_layers_sizes[-1],n_out=n_hidden)
     self.reg_layer2 = regressionLayer(rng=rng,input=self.reg_layer1.output,n_in=n_hidden,n_out=n_out)
     self.params.extend(self.reg_layer1.params)
     self.params.extend(self.reg_layer2.params)
     self.output = self.reg_layer2.output
     self.errors = T.mean((self.output-self.y)**2)
예제 #20
0
def build_model(tparams, options):
    trng = RandomStreams(SEED)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype=config.floatX)
    y = tensor.vector('y', dtype='int64')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps,
                                                n_samples,
                                                options['dim_proj']])
    proj = get_layer(options['encoder'])[1](tparams, emb, options,
                                            prefix=options['encoder'],
                                            mask=mask)
    if options['encoder'] == 'lstm':
        proj = (proj * mask[:, :, None]).sum(axis=0)
        proj = proj / mask.sum(axis=0)[:, None]
    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng)

    pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])

    f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
    f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred')

    cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()

    return use_noise, x, mask, y, f_pred_prob, f_pred, cost
예제 #21
0
def rebuild_nn(nn_params):
    W_e, W_p, W_o, b_o = read_obj(nn_params, 4)
    mlp = MLPNoHid(W_e.get_value(), W_p.get_value(), W_o.get_value(), b_o.get_value())
    wx = T.matrix('word', dtype='int32')
    px = T.matrix('POS', dtype='int32')
    f_pred = theano.function([wx, px], mlp.output(wx, px))
    return f_pred
def test_pdf_compare_logpdf():
    theano.config.compute_test_value = 'raise'
    sample = T.matrix()
    sample.tag.test_value = np.random.random((10, 5)).astype(theano.config.floatX)

    mean = T.vector()
    mean.tag.test_value = np.empty(5).astype(theano.config.floatX)

    cov = T.matrix()
    cov.tag.test_value = np.random.random((5, 5)).astype(theano.config.floatX)


    density = mvn.pdf(sample, mean, cov)
    log_density = mvn.logpdf(sample, mean, cov)

    f_density = theano.function([sample, mean, cov], density)
    f_logdensity = theano.function([sample, mean, cov], log_density)

    some_sample = np.random.random((20, 5)).astype(theano.config.floatX)
    some_mean = np.array([1., 2., 3., 4., 5.]).astype(theano.config.floatX)
    w = np.random.random((5, 5)).astype(theano.config.floatX)

    some_cov = np.dot(w, w.T) + np.eye(5).astype(theano.config.floatX)


    d = f_density(some_sample, some_mean, some_cov)
    log_d = f_logdensity(some_sample, some_mean, some_cov)

    assert np.allclose(np.log(d), log_d)
예제 #23
0
    def build_model(self):
        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        U, W, V, bh, by = self.U, self.W, self.V, self.bh, self.by
        x = T.matrix('x')
        y = T.matrix('y')

        def forward_prop_step(x_t, s_tm1, U, W, bh):
            s_t = self.activation(T.dot(U, x_t) + T.dot(W, s_tm1) + bh)
            return s_t

        s, _ = theano.scan(
            forward_prop_step,
            sequences=x,
            outputs_info=[dict(initial=T.zeros(self.hidden_dim))],
            non_sequences=[U, W, bh],
            mode='DebugMode')

        p_y = T.nnet.softmax(T.dot(self.V, s[-1]) + by)
        prediction = T.argmax(p_y, axis=1)
        o_error = T.sum(T.nnet.categorical_crossentropy(p_y, y))
        self.cost = o_error + self.L1_reg * self.L1 + self.L2_reg * self.L2_sqr

        # Assign functions
        self.forward_propagation = theano.function([x], s[-1])
        self.predict = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], o_error)

        l_r = T.scalar('l_r', dtype=theano.config.floatX)   # learning rate (may change)
        mom = T.scalar('mom', dtype=theano.config.floatX)   # momentum
        self.bptt, self.f_update = self.Momentum(x, y, l_r, mom)
예제 #24
0
파일: test_opt.py 프로젝트: aalmah/Theano
    def est_both_assert_merge_2_reverse(self):
        # Test case "test_both_assert_merge_2" but in reverse order
        x1 = T.matrix('x1')
        x2 = T.matrix('x2')
        x3 = T.matrix('x3')
        e = T.dot(x1, T.opt.assert_op(x2, (x2 > x3).all())) +\
            T.dot(T.opt.assert_op(x1, (x1 > x3).all()), x2)
        g = FunctionGraph([x1, x2, x3], [e])
        MergeOptimizer().optimize(g)
        strg = theano.printing.debugprint(g, file='str')
        strref = '''Elemwise{add,no_inplace} [@A] ''   7
 |dot [@B] ''   6
 | |Assert{msg='Theano Assert failed!'} [@C] ''   5
 | | |x1 [@D]
 | | |All [@E] ''   3
 | |   |Elemwise{gt,no_inplace} [@F] ''   1
 | |     |x1 [@D]
 | |     |x3 [@G]
 | |Assert{msg='Theano Assert failed!'} [@H] ''   4
 |   |x2 [@I]
 |   |All [@J] ''   2
 |     |Elemwise{gt,no_inplace} [@K] ''   0
 |       |x2 [@I]
 |       |x3 [@G]
 |dot [@B] ''   6
'''
        print(strg)
        assert strg == strref, (strg, strref)
예제 #25
0
def get_model(Ws, bs, dropout=False):
    v = T.matrix('input')
    m = T.matrix('missing')
    q = T.matrix('target')
    k = T.vector('normalization factor')

    # Set all missing/target values to 0.5
    keep_mask = (1-m) * (1-q)
    h = keep_mask * (v * 2 - 1) # Convert to +1, -1
    
    # Normalize layer 0
    h *= k.dimshuffle(0, 'x')

    for l in xrange(len(Ws)):
        h = T.dot(h, Ws[l]) + bs[l]

        if l < len(Ws) - 1:
            h = h * (h > 0) # relu
            if dropout:
                mask = srng.binomial(n=1, p=0.5, size=h.shape)
                h = h * mask * 2

    output = sigmoid(h)
    LL = v * T.log(output) + (1 - v) * T.log(1 - output)
    # loss = -(q * LL).sum() / q.sum()
    loss = -((1 - m) * LL).sum() / (1 - m).sum()

    return v, m, q, k, output, loss
예제 #26
0
파일: test_blas.py 프로젝트: caglar/Theano
def test_hgemm_swap():
    from theano.sandbox.cuda import nvcc_compiler
    if nvcc_compiler.nvcc_version < '7.5':
        raise SkipTest("SgemmEx is only avaialble on cuda 7.5+")

    v = tensor.vector(dtype='float16')
    m = tensor.matrix(dtype='float16')
    m2 = tensor.matrix(dtype='float16')
    m32 = tensor.matrix(dtype='float32')

    # test that we don't try to replace anything but matrix x matrix in float16
    f = theano.function([v, m], tensor.dot(v, m), mode=mode_with_gpu)
    assert len([node for node in f.maker.fgraph.apply_nodes
                if isinstance(node.op, GpuGemm)]) == 0

    f = theano.function([m32, m], tensor.dot(m32, m), mode=mode_with_gpu)
    assert len([node for node in f.maker.fgraph.apply_nodes
                if isinstance(node.op, GpuGemm)]) == 0

    f = theano.function([m, m2], tensor.dot(m, m2), mode=mode_with_gpu)
    assert len([node for node in f.maker.fgraph.apply_nodes
                if isinstance(node.op, GpuGemm)]) == 1

    v1 = numpy.random.random((3, 4)).astype('float16')
    v2 = numpy.random.random((4, 2)).astype('float16')

    of = f(v1, v2)
    on = numpy.dot(v1, v2)

    utt.assert_allclose(of, on)
예제 #27
0
    def _setup_vars(self, sparse_input):
        '''Setup Theano variables for our network.

        Parameters
        ----------
        sparse_input : bool
            Not used -- sparse inputs are not supported for recurrent networks.

        Returns
        -------
        vars : list of theano variables
            A list of the variables that this network requires as inputs.
        '''
        _warn_dimshuffle()

        assert not sparse_input, 'Theanets does not support sparse recurrent models!'

        self.src = TT.ftensor3('src')
        #self.src_mask = TT.imatrix('src_mask')
        self.src_mask = TT.matrix('src_mask')
        self.dst = TT.ftensor3('dst')
        self.labels = TT.imatrix('labels')
        self.weights = TT.matrix('weights')

        if self.weighted:
            return [self.src, self.src_mask, self.dst, self.labels, self.weights]
        return [self.src, self.dst]
예제 #28
0
def funcs(dataset, network, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, momentum=MOMENTUM, alpha=L2_CONSTANT):

    """
        Method the returns the theano functions that are used in
        training and testing. These are the train and predict functions.
        The predict function returns out output of the network.
    """

    # symbolic variables
    X_batch = T.matrix()
    y_batch = T.matrix()

    # this is the cost of the network when fed throught the noisey network
    l2 = lasagne.regularization.l2(X_batch)
    train_output = lasagne.layers.get_output(network, X_batch)
    cost = lasagne.objectives.mse(train_output, y_batch)
    cost = cost.mean() #+ alpha*l2

    # test the performance of the netowork without noise
    test = lasagne.layers.get_output(network, X_batch, deterministic=True)
    pred = T.argmax(test, axis=1)
    accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

    all_params = lasagne.layers.get_all_params(network)
    updates = lasagne.updates.nesterov_momentum(cost, all_params, learning_rate, momentum)

    train = theano.function(inputs=[X_batch, y_batch], outputs=cost, updates=updates, allow_input_downcast=True)
    valid = theano.function(inputs=[X_batch, y_batch], outputs=cost, allow_input_downcast=True)
    predict = theano.function(inputs=[X_batch], outputs=pred, allow_input_downcast=True)

    return dict(
        train=train,
        valid=valid,
        predict=predict
    )
예제 #29
0
 def __init__(self, n_in, n_out, n_h, learning_rate=0.12):
    self.x = T.matrix(dtype=theano.config.floatX)  # @UndefinedVariable
    self.target = T.matrix(dtype=theano.config.floatX)  # @UndefinedVariable
    bound_x = numpy.sqrt(6. / (n_in + n_h))
    bound_h = numpy.sqrt(6. / (n_h + n_h))
    self.params = []
    self.w_x = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_in, n_h)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w_x)
    self.w_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_h)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w_h)
    self.b_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.b_h)
    self.w = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_out)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w)
    self.b = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_out,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.b)
    self.h0 = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_h,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.h0)
    
    def one_step(x, h1):
        h = T.nnet.sigmoid(T.dot(x, self.w_x) + T.dot(h1, self.w_h) + self.b_h)
        y = T.nnet.sigmoid(T.dot(h, self.w) + self.b)
        return h, y
    
    [hs, ys], _ = theano.scan(fn=one_step, sequences=self.x, outputs_info=[self.h0, None])
    cost = -T.mean(self.target * T.log(ys) + (1 - self.target) * T.log(1 - ys))
    grads = T.grad(cost, self.params)
    
    updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, grads)]
    
    self.train = theano.function([self.x, self.target], cost, updates=updates)
    
    self.predict = theano.function([self.x], ys)
예제 #30
0
    def test_one_step(self):
        h0 = tensor.matrix('h0')
        c0 = tensor.matrix('c0')
        x = tensor.matrix('x')
        h1, c1 = self.lstm.apply(x, h0, c0, iterate=False)
        next_h = theano.function(inputs=[x, h0, c0], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        c0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        x_val = 0.1 * numpy.array([range(12), range(12, 24)],
                                  dtype=theano.config.floatX)
        W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX)
        W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX)

        # omitting biases because they are zero
        activation = numpy.dot(h0_val, W_state_val) + x_val

        def sigmoid(x):
            return 1. / (1. + numpy.exp(-x))

        i_t = sigmoid(activation[:, :3] + c0_val * W_cell_to_in)
        f_t = sigmoid(activation[:, 3:6] + c0_val * W_cell_to_forget)
        next_cells = f_t * c0_val + i_t * numpy.tanh(activation[:, 6:9])
        o_t = sigmoid(activation[:, 9:12] +
                      next_cells * W_cell_to_out)
        h1_val = o_t * numpy.tanh(next_cells)
        assert_allclose(h1_val, next_h(x_val, h0_val, c0_val)[0],
                        rtol=1e-6)
예제 #31
0
reg_cost = lib.ops.kl_unit_gaussian.kl_unit_gaussian(
    mu,
    log_sigma
).sum(axis=1)

alpha = T.minimum(
    1,
    T.cast(total_iters, theano.config.floatX) / lib.floatX(ALPHA_ITERS)
)

if VANILLA:
    cost = reconst_cost
else:
    cost = reconst_cost + (alpha * reg_cost)

sample_fn_latents = T.matrix('sample_fn_latents')
sample_fn = theano.function(
    [sample_fn_latents, images],
    T.nnet.sigmoid(decode_algo(sample_fn_latents, images)),
    on_unused_input='warn'
)

eval_fn = theano.function(
    [images, total_iters],
    cost.mean()
)

train_data, dev_data, test_data = lib.mnist_binarized.load(
    BATCH_SIZE,
    TEST_BATCH_SIZE
)
예제 #32
0

def shuffle_data(samples, labels):
    idx = np.arange(samples.shape[0])
    np.random.shuffle(idx)
    #print  (samples.shape, labels.shape)
    samples, labels = samples[idx], labels[idx]
    return samples, labels


decay = 1e-6
learning_rate = 0.01
epochs = 1000

# theano expressions
X = T.matrix()  #features
Y = T.matrix()  #output

w1, b1 = create_weights(36, 10), create_bias(
    10)  #weights and biases from input to hidden layer
w2, b2 = create_weights(10, 6, logistic=False), create_bias(
    6)  #weights and biases from hidden to output layer

h1 = T.nnet.sigmoid(T.dot(X, w1) + b1)
py = T.nnet.softmax(T.dot(h1, w2) + b2)

y_x = T.argmax(py, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(
    py, Y)) + decay * (T.sum(T.sqr(w1) + T.sum(T.sqr(w2))))
params = [w1, b1, w2, b2]
예제 #33
0
def trainword(keyword, window_radius = 3, learning_rate = 0.1, n_epochs = 10,batch_size = 1,filter_height=3,filter_width = 50, pool_height=1,pool_width = 1, loginput_num = 50, vector_size = 50):

    print '==training parameters=='
    print 'window_radius: '+str(window_radius)
    print 'vector_size: '+str(vector_size)
    print 'filter_height: '+str(filter_height)
    print 'filter_width: '+str(filter_width)
    print 'pool_height: '+str(pool_height)
    print 'pool_width: '+str(pool_width)
    print 'loginput_num: '+str(loginput_num)
    print 'learning_rate: '+str(learning_rate)
    print 'n_epochs: '+str(n_epochs)
    print 'batch_size: '+str(batch_size)

    rng = numpy.random.RandomState(23455)
    datasets = load_data_word(keyword, window_radius, vector_size)

    train_set_x, train_set_y, trainsentence = datasets[0][0]
    valid_set_x, valid_set_y, validsentence = datasets[0][1]
    test_set_x, test_set_y, testsentence = datasets[0][2]

    senselist = datasets[1]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size
    print n_train_batches, n_valid_batches, n_test_batches

    index = T.lscalar()

    x = T.matrix('x')   
    y = T.ivector('y')

    print '... building the model for '+keyword

    layer0_input = x.reshape((batch_size, 1, 2*window_radius+1, vector_size))

    layer0 = WsdConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 2*window_radius+1, vector_size),
        filter_shape=(1, 1, filter_height, filter_width),
        poolsize=(pool_height, pool_width)
    )

    layer1_input = layer0.output.flatten(2)
    #layer1_input = layer0_input.flatten(2)
    
    layer1 = HiddenLayer(
        rng,
        input=layer1_input,
        #n_in=(2*window_radius+1)*(vector_size+1-filter_width+1-pool_width),
        n_in=int((2*window_radius+2-filter_height)/float(pool_height))*int((vector_size+1-filter_width)/float(pool_width)),
        n_out=loginput_num,
        activation=T.tanh
    )
    
    layer2 = LogisticRegression(input=layer1_input, n_in=int((2*window_radius+2-filter_height)/float(pool_height))*int((vector_size+1-filter_width)/float(pool_width)), n_out=20)

    cost = layer2.negative_log_likelihood(y)

    test_model = theano.function(
        [index],
        layer2.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer2.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    output_size = theano.function(
        [index],
        [layer0.output.shape],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    output_model = theano.function(
        [index],
        [layer2.y_pred],
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    output_test = theano.function(
        [index],
        [layer2.y_pred],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    params = layer2.params + layer0.params

    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_params = 0
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                #for index in range(0, n_valid_batches):
                #    print output_model(index)
                #    print valid_set_y[index * batch_size: (index + 1) * batch_size].eval()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    best_params = [copy.deepcopy(layer0.params), copy.deepcopy(layer1.params), copy.deepcopy(layer2.params)]

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    #print params[0].eval()
                    #print (params[0].eval() == layer2.params[0].eval())
                    #print validation_losses
                    for index in range(0, n_valid_batches):
                        for i in range(0, batch_size):
                            true_i = batch_size*index+i
                            #print output_model(index)
                            print validsentence[true_i], '\t',senselist[output_model(index)[0][i]], '\t', senselist[valid_set_y[true_i].eval()]
                    #print test_losses
                    test_score = numpy.mean(test_losses)
                    for index in range(0, n_test_batches):
                        for i in range(0, batch_size):
                            true_i = batch_size*index+i
                            #print output_model(index)
                            print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()]
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    for index in range(0, n_test_batches):
        for i in range(0, batch_size):
            true_i = batch_size*index+i
            #print output_model(index)
            print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()]
    layer0.W = copy.deepcopy(best_params[0][0])
    layer0.b = copy.deepcopy(best_params[0][1])
    #layer0.params = [layer0.W, layer0.b]
    layer1.W = copy.deepcopy(best_params[1][0])
    layer1.b = copy.deepcopy(best_params[1][1])
    #layer1.params = [layer1.W, layer1.b]
    layer2.W = copy.deepcopy(best_params[2][0])
    layer2.b = copy.deepcopy(best_params[2][1])
    #layer2.params = [layer2.W, layer2.b]
    for index in range(0, n_test_batches):
        for i in range(0, batch_size):
            true_i = batch_size*index+i
            #print output_model(index)
            print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()]
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #34
0
def test_GRBM_DBN(finetune_lr=0.2, pretraining_epochs=1,
             pretrain_lr=0.01, k=1, training_epochs=10,
             dataset='mnist.pkl.gz', batch_size=10, annealing_learning_rate=0.999):
    """
    Demonstrates how to train and test a Deep Belief Network.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type k: int
    :param k: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """

    datasets = load_data_grbm(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network
    x_skeleton = T.matrix('x')
    dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
                hidden_layers_sizes=[1000, 1000, 1000],
                n_outs=10, finetune_lr=finetune_lr, input=x_skeleton)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                k=k)



    # The following part is to get the value for testing
    if False:
        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')  # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 2D vector of [float32] labels

        dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
                    hidden_layers_sizes=[1000, 1000, 1000],
                    n_outs=10)
        dbn.load('dbn_params.npy')
        #train_fn, validate_model, test_model = dbn.build_finetune_functions(
        #            datasets=datasets, batch_size=batch_size,
        #            learning_rate=finetune_lr)


        valid_score_i = theano.function([index], dbn.errors,
                givens={dbn.x: valid_set_x[index * batch_size:
                                            (index + 1) * batch_size],
                        dbn.y: valid_set_y[index * batch_size:
                                            (index + 1) * batch_size]})

        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size

        validation_losses = [valid_score_i(i) for i in xrange(n_valid_batches)]

        validation_losses = valid_score_i()
        this_validation_loss = numpy.mean(validation_losses)



        ## get the actual softmax layer
        temp = theano.function([index],dbn.logLayer.p_y_given_x,
                    givens={dbn.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size]})
        temp_out = [temp(i) for i in xrange(n_valid_batches)]
        



    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        start_time_temp = time.clock()
        if i==0:
            # for GRBM, the The learning rate needs to be about one or 
            #two orders of magnitude smaller than when using
            #binary visible units and some of the failures reported in the 
            # literature are probably due to using a
            pretrain_lr_new = pretrain_lr*0.1 
        else:
            pretrain_lr_new = pretrain_lr
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            lr=pretrain_lr_new))
            end_time_temp = time.clock()
            print 'Pre-training layer %i, epoch %d, cost %f ' % (i, epoch, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) )


    end_time = time.clock()
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model = dbn.build_finetune_functions(
                datasets=datasets, batch_size=batch_size,
                annealing_learning_rate=annealing_learning_rate)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 4 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.    # wait this much longer when a new best is
                              # found
    improvement_threshold = 0.999 # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                import warnings
                warnings.filterwarnings("ignore")
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss < best_validation_loss *
                        improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter


                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                 (best_validation_loss * 100., test_score * 100.))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))

    print dbn.state_learning_rate.get_value()
예제 #35
0
파일: mlp.py 프로젝트: mesnilgr/sarco
def test_mlp(learning_rate=0.05,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             split=0,
             batch_size=1,
             n_hidden=[100],
             rot=5,
             seuil=0.25):
    datasets = load_data(split)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(
        borrow=True).shape[0]  #/ batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]  #/ batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.matrix('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)
    shp = train_set_x.get_value().shape[1]
    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=shp, n_hidden=n_hidden, n_out=shp)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    pred_test = theano.function(inputs=[index],
                                outputs=[classifier.y_pred, y],
                                givens={
                                    x: test_set_x[index:(index + 1)],
                                    y: test_set_y[index:(index + 1)]
                                })

    pred_train = theano.function(inputs=[index],
                                 outputs=[classifier.y_pred, y],
                                 givens={
                                     x: train_set_x[index:(index + 1)],
                                     y: train_set_y[index:(index + 1)]
                                 })

    pred_valid = theano.function(inputs=[index],
                                 outputs=[classifier.y_pred, y],
                                 givens={
                                     x: valid_set_x[index:(index + 1)],
                                     y: valid_set_y[index:(index + 1)]
                                 })

    def evaluation(fn, d, ens, epoch, seuil, plot):
        x = d.get_value()
        n_samples = x.shape[0]
        if plot: bigpic = []
        acc = []

        for i in xrange(n_samples):

            pred, true = fn(i)
            pred_mask = pred * (x[i] > 0)

            pred_out = (pred_mask >= seuil).astype(numpy.int)
            true_out = true.astype(numpy.int)
            acc += [jaccard(pred_out, true_out)]

            if plot:
                bigpic += [x[i], pred, pred_mask, pred_out, true_out]

        this_acc = numpy.mean(acc)
        std_acc = numpy.std(acc)

        print('epoch %i, %s error %f +- %f %%' %
              (epoch, ens, this_acc * 100., std_acc * 100.))

        if plot:
            bigpic = numpy.vstack(bigpic)
            tile = tile_raster_images(bigpic, (311, 457),
                                      (n_samples // 4, 5 * 4),
                                      output_pixel_vals=True)
            Im.fromarray(tile).convert("RGB").save("images/" + ens +
                                                   str(epoch) + ".png")

        return this_acc

    gparams = [T.grad(cost, param) for param in classifier.params]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    n_training_samples = train_set_x.get_value().shape[0]
    print '... training over %i training samples' % n_training_samples

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 1  # 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = -numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    evaluation(pred_train, train_set_x, "train", epoch, seuil, True)
    print "training started..."

    while (epoch < n_epochs) and (not done_looping):
        rotate_data((train_set_x, train_set_y), rot)
        epoch = epoch + 1
        minibatch_avg_cost = []
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost += [train_model(minibatch_index)]

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                print "mean avg cost over training :: ", numpy.mean(
                    minibatch_avg_cost)

                evaluation(pred_train, train_set_x, "train", epoch, seuil,
                           True)
                val = evaluation(pred_valid, valid_set_x, "valid", epoch,
                                 seuil, True)

                # if we got the best validation score until now
                if val > best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (val > best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = val
                    best_iter = iter
                    evaluation(pred_test, test_set_x, "test", epoch, seuil,
                               True)

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #36
0
	def __init__(self, rng, filter_shape, image_shape, poolsize=2, xin=None):
		
		assert image_shape[1] == filter_shape[1]
		self.image_shape=theano.shared(
			value=np.asarray(image_shape,dtype='int16'),borrow=True)
		self.poolsize=(poolsize,poolsize)
		#self.input = input
		if xin:
			self.x=xin
		else:
			self.x = T.matrix(name='input')
		self.x1=self.x.reshape(self.image_shape,ndim=4)
		self.filter_shape=filter_shape
		
		# there are "num input feature maps * filter height * filter width"
		# inputs to each hidden unit
		fan_in = np.prod(filter_shape[1:])
		# each unit in the lower layer receives a gradient from:
		# "num output feature maps * filter height * filter width" /
		#   pooling size
		fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) /
			np.prod(self.poolsize))
		# initialize weights with random weights
		W_bound = np.sqrt(6. / (fan_in + fan_out))
		self.W = theano.shared(
			np.asarray(
				rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
				dtype=theano.config.floatX
			),
			borrow=True
		)
		self.W_prime=self.W[:,:,::-1,::-1]
		self.W_prime=self.W_prime.dimshuffle(1,0,2,3)
		#self.W_prime=self.W_prime[:,::-1]
		#print self.W.get_value()
		#print self.W_prime.eval()
		# the bias is a 1D tensor -- one bias per output feature map
		b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
		bp_values = np.zeros((filter_shape[1],), dtype=theano.config.floatX)
		self.b = theano.shared(value=b_values, borrow=True)
		self.b_prime = theano.shared(value=bp_values, borrow=True)
		
		if poolsize<-1:
			self.x1=self.x1.repeat(int(-poolsize), axis=2).repeat(int(-poolsize), axis=3)

		# convolve input feature maps with filters
		conv_out = conv2d(
			input=self.x1,
			filters=self.W,
			filter_shape=filter_shape,
			#image_shape=self.image_shape.eval(),
			border_mode='full'
		)
		bp=(filter_shape[2]-1)/2
		
		conv_out=conv_out[:,:,bp:-bp,bp:-bp]
		
		# downsample each feature map individually, using maxpooling
		if poolsize>1:
			try:
				self.pooled_out = pool.pool_2d(
					input=conv_out,
					ws=self.poolsize,
					ignore_border=True
				)
			except:
				
				self.pooled_out = pool.pool_2d(
					input=conv_out,
					ds=self.poolsize,
					ignore_border=True
				)
		else:
			self.pooled_out=conv_out
		
		self.hidden = T.maximum(0,(self.pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')))

		# store parameters of this layer
		self.params = [self.W,self.b]
예제 #37
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10, finetune_lr=0.1, input_x=None, label=None):

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        # wudi add the mean and standard deviation of the activation values to exam the neural net
        # Reference: Understanding the difficulty of training deep feedforward neural networks, Xavier Glorot, Yoshua Bengio
        self.out_mean = []
        self.out_std = []

        assert self.n_layers > 0
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        
            
        if input_x is None:
            self.x = T.matrix('x')  # the data is presented as rasterized images
        else: 
            self.x = input_x
        if label is None:
            self.y = T.ivector('y')  # the labels are presented as 1D vector
                                     # of [int] labels
        else:
            self.y = label

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.out_mean.append(T.mean(sigmoid_layer.output))
            self.out_std.append(T.std(sigmoid_layer.output))

            self.params.extend(sigmoid_layer.params)
            # Construct an RBM that shared weights with this layer
            if i == 0:
                rbm_layer = GBRBM(input=layer_input, n_in=input_size, n_hidden=hidden_layers_sizes[i], \
                W=None, hbias=None, vbias=None, numpy_rng=None, transpose=False, activation=T.nnet.sigmoid,
                theano_rng=None, name='grbm', W_r=None, dropout=0, dropconnect=0)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        #################################################
        # Wudi change the annealing learning rate:
        #################################################
        self.state_learning_rate =  theano.shared(numpy.asarray(finetune_lr,
                                               dtype=theano.config.floatX),
                                               borrow=True)
예제 #38
0
    def __init__(
            self,
            input_shape,
            output_dim,
            prob_network=None,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            optimizer=None,
            use_trust_region=True,
            step_size=0.01,
            normalize_inputs=True,
            name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        if prob_network is None:
            prob_network = MLP(
                input_shape=input_shape,
                output_dim=output_dim,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        l_prob = prob_network.output_layer

        LasagnePowered.__init__(self, [l_prob])

        xs_var = prob_network.input_layer.input_var
        ys_var = TT.imatrix("ys")
        old_prob_var = TT.matrix("old_prob")

        x_mean_var = theano.shared(
            np.zeros((1,) + input_shape),
            name="x_mean",
            broadcastable=(True,) + (False, ) * len(input_shape)
        )
        x_std_var = theano.shared(
            np.ones((1,) + input_shape),
            name="x_std",
            broadcastable=(True,) + (False, ) * len(input_shape)
        )

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var

        prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var})

        old_info_vars = dict(prob=old_prob_var)
        info_vars = dict(prob=prob_var)

        dist = self._dist = Categorical()

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))

        loss = - TT.mean(dist.log_likelihood_sym(ys_var, info_vars))

        predicted = special.to_onehot_sym(TT.argmax(prob_var, axis=1), output_dim)

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_prob = ext.compile_function([xs_var], prob_var)
        self._l_prob = l_prob

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[prob_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
예제 #39
0
    def createGradientFunctions(self):
        # Create the Theano variables
        x_arg1 = T.matrix('x_arg1', dtype='float32')
        x_arg2 = T.matrix('x_arg2', dtype='float32')
        true_class = T.matrix('true_class', dtype='float32')
        eps = T.matrix("eps", dtype='float32')
        Arg1_W1, Arg1_W2, Arg1_W3, Arg1_W4, Arg1_W5, \
        Arg2_W2, Arg2_W3, Arg2_W4, \
        Arg1_W2_prior, Arg1_W3_prior, Arg2_W2_prior, Arg2_W3_prior, \
        Arg1_b1, Arg1_b4, Arg1_b5, Arg2_b4, b2, b3, \
        L_Wc, L_bc, Label_W1, Label_W2, Label_W3, \
        L_W4, Label_b1, L_b4, b2_prior, b3_prior, L_W6, L_b6, L_W7, L_b7, L_W8, L_b8 = self.tparams

        # Parameter Tying
        Arg2_W1 = Arg1_W1
        Arg2_b1 = Arg1_b1
        Arg2_W5 = Arg1_W5
        Arg2_b5 = Arg1_b5

        # Neural Inferencer
        h_arg1_encoder = T.tanh(T.dot(Arg1_W1,x_arg1) + Arg1_b1.dimshuffle(0, 'x'))
        h_arg2_encoder = T.tanh(T.dot(Arg2_W1,x_arg2) + Arg2_b1.dimshuffle(0, 'x'))
        l_encoder = T.tanh(T.dot(Label_W1,true_class) + Label_b1.dimshuffle(0, 'x'))

        mu_poster_encoder = T.dot(Arg1_W2,h_arg1_encoder) + T.dot(Arg2_W2,h_arg2_encoder) \
                + T.dot(Label_W2,l_encoder) + b2.dimshuffle(0, 'x')
        log_sigma_poster_encoder = \
                np.float32(0.5)*(T.dot(Arg1_W3,h_arg1_encoder) + T.dot(Arg2_W3,h_arg2_encoder) \
                + T.dot(Label_W3,l_encoder) + b3.dimshuffle(0, 'x'))

        mu_prior_encoder = T.dot(Arg1_W2_prior,h_arg1_encoder) + T.dot(Arg2_W2_prior,h_arg2_encoder) \
                + b2_prior.dimshuffle(0, 'x')
        log_sigma_prior_encoder = \
                np.float32(0.5)*(T.dot(Arg1_W3_prior,h_arg1_encoder) + T.dot(Arg2_W3_prior,h_arg2_encoder) \
                + b3_prior.dimshuffle(0, 'x'))

        #Find the hidden variable z
        z = mu_poster_encoder + T.exp(log_sigma_poster_encoder)*eps

        prior = T.sum((log_sigma_prior_encoder - log_sigma_poster_encoder) + \
                (T.exp(log_sigma_poster_encoder)**np.float32(2) + \
                (mu_poster_encoder - mu_prior_encoder)**np.float32(2)) /
                (np.float32(2)*(T.exp(log_sigma_prior_encoder)**np.float32(2))) - np.float32(0.5))

        #Neural Generator
        h_arg1_decoder = T.tanh(T.dot(Arg1_W4,z) + Arg1_b4.dimshuffle(0, 'x'))
        h_arg2_decoder = T.tanh(T.dot(Arg2_W4,z) + Arg2_b4.dimshuffle(0, 'x'))
        y_arg1 = T.nnet.sigmoid(T.dot(Arg1_W5,h_arg1_decoder) + Arg1_b5.dimshuffle(0, 'x'))
        y_arg2 = T.nnet.sigmoid(T.dot(Arg2_W5,h_arg2_decoder) + Arg2_b5.dimshuffle(0, 'x'))
        logpxz = -(T.nnet.binary_crossentropy(y_arg1,x_arg1).sum() \
                + T.nnet.binary_crossentropy(y_arg2,x_arg2).sum())

        l_decoder = T.tanh(T.dot(L_W4,z) + L_b4.dimshuffle(0, 'x'))
        l_pred_decoder = T.tanh(T.dot(L_W4, mu_prior_encoder) + L_b4.dimshuffle(0, 'x'))

        l_decoder = T.tanh(T.dot(L_W6,l_decoder) + L_b6.dimshuffle(0, 'x'))
        l_pred_decoder = T.tanh(T.dot(L_W6,l_pred_decoder) + L_b6.dimshuffle(0, 'x'))
        l_decoder = T.tanh(T.dot(L_W7,l_decoder) + L_b7.dimshuffle(0, 'x'))
        l_pred_decoder = T.tanh(T.dot(L_W7,l_pred_decoder) + L_b7.dimshuffle(0, 'x'))
        l_decoder = T.tanh(T.dot(L_W8,l_decoder) + L_b8.dimshuffle(0, 'x'))
        l_pred_decoder = T.tanh(T.dot(L_W8,l_pred_decoder) + L_b8.dimshuffle(0, 'x'))

        pred_class = T.nnet.softmax(T.dot(L_Wc,l_decoder) + L_bc.dimshuffle(0, 'x'))
        logpc = -(T.nnet.categorical_crossentropy(pred_class,true_class).sum())
        pred_level = T.nnet.softmax(T.dot(L_Wc,l_pred_decoder) + L_bc.dimshuffle(0, 'x'))

        logp = - logpxz - logpc + prior

        #Compute all the gradients
        derivatives = T.grad(logp,wrt=self.tparams)

        # apply gradient clipping here
        if self.clip_c > 0.:
            g2 = 0.
            for g in derivatives:
                g2 += (g**2).sum()
            new_grads = []
            for g in derivatives:
                new_grads.append(T.switch(g2 > (self.clip_c**2),
                                           g / T.sqrt(g2) * self.clip_c,
                                           g))
            derivatives = new_grads

        #Add the lowerbound so we can keep track of results
        derivatives.append(logp)

        self.gradientfunction = theano.function([x_arg1,x_arg2,true_class,eps], \
                derivatives, on_unused_input='ignore')
        self.lowerboundfunction = theano.function([x_arg1,x_arg2,true_class,eps], \
                logp, on_unused_input='ignore')
        self.predictionfunction = theano.function([x_arg1,x_arg2], \
                pred_level.T, on_unused_input='ignore')

        #Adam Optimizer
        # This code is adapted from https://github.com/nyu-dl/dl4mt-tutorial/blob/master/session2/nmt.py
        def adam(lr, tparams, grads, inp, cost, beta1=0.9, beta2=0.999, e=1e-8):
            gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
               for k, p in zip(self.params_names, tparams)]
            gsup = [(gs, g) for gs, g in zip(gshared, grads)]

            f_grad_shared = theano.function(inp, cost, updates=gsup, profile=False)

            updates = []

            t_prev = theano.shared(np.float32(0.))
            t = t_prev + 1.
            lr_t = lr * T.sqrt(1. - beta2**t) / (1. - beta1**t)

            for p, g in zip(tparams, gshared):
                m = theano.shared(p.get_value() * 0., p.name + '_mean')
                v = theano.shared(p.get_value() * 0., p.name + '_variance')
                m_t = beta1 * m + (1. - beta1) * g
                v_t = beta2 * v + (1. - beta2) * g**2
                step = lr_t * m_t / (T.sqrt(v_t) + e)
                p_t = p - step
                updates.append((m, m_t))
                updates.append((v, v_t))
                updates.append((p, p_t))
            updates.append((t_prev, t))

            f_update = theano.function([lr], [], updates=updates,
                               on_unused_input='ignore', profile=False)

            return f_grad_shared, f_update

        lr = T.scalar(name='lr')
        self.f_grad_shared, self.f_update = \
                adam(lr, self.tparams, derivatives[:-1], [x_arg1,x_arg2,true_class,eps], logp)
예제 #40
0
파일: ops.py 프로젝트: saadmahboob/Theano
 def make_node(self, _x):
     x = as_tensor_variable(_x)
     if x.type.ndim != 1:
         raise TypeError('AllocDiag only works on vectors', _x)
     return Apply(self, [x], [tensor.matrix(dtype=x.type.dtype)])
예제 #41
0
def main():

	usage="""Segment a tomograph using convolutional neural network. Please run this program from the GUI in e2projectmanager.py."""
	#print usage
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)
	#parser.add_header(name="tmpheader", help='temp label', title="### This program is NOT avaliable yet... ###", row=0, col=0, rowspan=1, colspan=2, mode="train,test")
	parser.add_argument("--trainset",help="Training set.", default=None, guitype='filebox', browser="EMParticlesTable(withmodal=True)",  row=1, col=0,rowspan=1, colspan=3, mode="train")
	parser.add_argument("--from_trained", type=str,help="Start from pre-trained neural network", default=None,guitype='filebox',browser="EMBrowserWidget(withmodal=True)", row=2, col=0, rowspan=1, colspan=3, mode="train,test")
	parser.add_argument("--netout", type=str,help="Output neural net file name", default="nnet_save.hdf",guitype='strbox', row=3, col=0, rowspan=1, colspan=3, mode="train")
	
	parser.add_argument("--learnrate", type=float,help="Learning rate ", default=.01, guitype='floatbox', row=4, col=0, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--niter", type=int,help="Training iterations", default=20, guitype='intbox', row=4, col=1, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--ncopy", type=int,help="Number of copies for each particle", default=1, guitype='intbox', row=5, col=0, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--batch", type=int,help="Batch size for the stochastic gradient descent. Default is 20.", default=20, guitype='intbox', row=5, col=1, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--nkernel", type=str,help="Number of kernels for each layer, from input to output. The number of kernels in the last layer must be 1. ", default="40,40,1", guitype='strbox', row=6, col=0, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--ksize", type=str,help="Width of kernels of each layer, the numbers must be odd. Note the number of layers should be the same as the nkernel option. ", default="15,15,15", guitype='strbox', row=6, col=1, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--poolsz", type=str,help="Pooling size for each layer. Note the number of layers should be the same as the nkernel option. ", default="2,1,1", guitype='strbox', row=7, col=0, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--weightdecay", type=float,help="Weight decay. Used for regularization.", default=1e-6, guitype='floatbox', row=7, col=1, rowspan=1, colspan=1, mode="train")
	parser.add_argument("--trainout", action="store_true", default=False ,help="Output the result of the training set", guitype='boolbox', row=8, col=0, rowspan=1, colspan=1, mode='train[True]')
	parser.add_argument("--training", action="store_true", default=False ,help="Doing training", guitype='boolbox', row=8, col=1, rowspan=1, colspan=1, mode='train[True]')
	parser.add_argument("--tomograms", type=str,help="Tomograms input.", default=None,guitype='filebox',browser="EMBrowserWidget(withmodal=True)", row=1, col=0, rowspan=1, colspan=3, mode="test")
	parser.add_argument("--applying", action="store_true", default=False ,help="Applying the neural network on tomograms", guitype='boolbox', row=4, col=0, rowspan=1, colspan=1, mode='test[True]')
	parser.add_argument("--dream", action="store_true", default=False ,help="Iterativly applying the neural network on noise")
	parser.add_argument("--to3d", action="store_true", default=True ,help="convert to result to 3D.", guitype='boolbox', row=5, col=1, rowspan=1, colspan=1, mode='test')
	parser.add_argument("--output", type=str,help="Segmentation out file name", default="tomosegresult.hdf", guitype='strbox', row=3, col=0, rowspan=1, colspan=1, mode="test")
	parser.add_argument("--threads", type=int,help="Number of thread to use when applying neural net on test images. Not used during trainning", default=12, guitype='intbox', row=10, col=0, rowspan=1, colspan=1, mode="test")
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)

	(options, args) = parser.parse_args()
	E2n=E2init(sys.argv,options.ppid)
	
	#### parse the options.
	options.nkernel=[int(i) for i in options.nkernel.split(',')]
	options.ksize=[int(i) for i in options.ksize.split(',')]
	if options.poolsz:
		options.poolsz=[int(i) for i in options.poolsz.split(',')]
	
	#### This is supposed to test the overfitting of the network by applying it on pure noise repeatly
	#### The function is no longer maintained so it may or may not work..
	if options.dream:
		print("This function is no longer supported.. exit.")
		return
		#os.environ["THEANO_FLAGS"]="optimizer=None"
		#print "Testing on big images, Theano optimizer disabled"
		#import_theano()
		#convnet=load_model(options.from_trained)
		#dream(convnet,options)
		#E2end(E2n)
		#exit()
		
	
	if options.applying:
		apply_neuralnet(options)
		E2end(E2n)
		exit()
	
	
	os.environ["THEANO_FLAGS"]="optimizer=fast_run"
	import_theano()

	
	batch_size=options.batch
	#### Train da with particles first.
	
	if options.trainset==None:
		print("No training set input...exit.")
		exit()
	
	
	rng = np.random.RandomState(123)

	labelshrink=np.prod(options.poolsz)
	print("loading particles...")
	particles=load_particles(options.trainset,labelshrink,options.ncopy, rng)

	train_set_x= particles[0]
	labels=particles[1]
	shape=particles[2]
	ntrain=particles[3]
	#print "Number of particles: {}".format(train_set_x.shape.eval()[0])
	
	# allocate symbolic variables for the data
	index = T.lscalar()	# index to a [mini]batch
	x = T.matrix('x')  # the data is presented as rasterized images
	image_shape=(batch_size, shape[2], shape[0],shape[1])
	
	if options.from_trained!=None:
		convnet=load_model(options.from_trained)
		convnet.update_shape(image_shape)
	else:
		print("setting up model")
		convnet = StackedConvNet(
			rng,
			nkernel=options.nkernel,
			ksize=options.ksize,
			poolsz=options.poolsz,
			imageshape=image_shape
		)
	
	
	
	#print shape
	
	
	if (options.niter>0):	
		print("training the convolutional network...")
		
		classify=convnet.get_classify_func(train_set_x,labels,batch_size)
			
		learning_rate=options.learnrate
		n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
		v0=np.inf
		nbad=0
		for epoch in xrange(options.niter):
		# go through the training set
			
			c = []   #### train set loss
			v = []   #### valid set loss
			if epoch==0:
				print(classify(0,lr=learning_rate,wd=options.weightdecay))
			for batch_index in xrange(n_train_batches):
				if batch_index*batch_size < ntrain:
					err=classify(batch_index,
						lr=learning_rate,
						wd=options.weightdecay)
					c.append(err)
					if epoch==0 and batch_index<5:
						print(err)
				else:
					err=classify(batch_index,
						lr=0,
						wd=options.weightdecay)
					v.append(err)
					
			#print len(v), len(c)
			learning_rate*=.9
			print("Training epoch {:d}, train loss {:.3f}, learning rate {:.3f}".format(epoch, np.mean(c),  learning_rate), end=' ')
			if len(v)>0:
				print("valid loss {:.3f}".format(np.mean(v)), end=' ') 
				if np.mean(v)>v0 and np.mean(v)>np.mean(c):
					nbad+=1
					print('*')
				else:
					nbad=0
					print() 
				v0=np.mean(v)
				if nbad>2:
					print("loss increase in validation set. Overfitting. Stop.")
					break
			else:
				print() 

		
		
		
	#######################################
	#print convnet.clslayer.W.get_value()
	#print convnet.clslayer.b.get_value()
			
	if options.trainout:
		print("Generating results ...")
		nsample=100
		convnet.update_shape((nsample, shape[2], shape[0],shape[1]))
		test_cls = theano.function(
			inputs=[],
			outputs=convnet.clslayer.get_image(False),
			givens={
				convnet.x: train_set_x[:nsample]
			}
		)
		if options.netout.endswith(".hdf"):
			fname="trainout_{}".format(options.netout)
		else:
			fname="trainout_{}.hdf".format(options.netout)
		try:os.remove(fname)
		except: pass
		#print convnet.outsize,shape
		mid=test_cls()
		
		ipt= train_set_x[:nsample]
		ipt= ipt.eval()
		
		lb= labels[:nsample].eval()
		amp=[]
		for t in range(nsample):
			
			#### raw image
			if shape[2]==1:
				img=ipt[t].reshape(shape[0],shape[1])
			else:
				img=ipt[t].reshape(shape[2],shape[0],shape[1])
				img=img[shape[2]/2]
			e0 = from_numpy(img.astype("float32"))
			e0.write_image(fname,-1)
			
			#### manual annotation
			img=lb[t].reshape(convnet.outsize,convnet.outsize)
			e1 = from_numpy(img.astype("float32"))
			e1=e1.get_clip(Region((convnet.outsize-shape[0])/2,(convnet.outsize-shape[0])/2,shape[0],shape[0]))
			e1.scale(float(shape[0])/float(convnet.outsize))
			e1.process_inplace("threshold.binary", {"value":.67})
			e1.write_image(fname,-1)
			
			#### neural net output
			img=mid[t].reshape(convnet.outsize,convnet.outsize)
			e2 = from_numpy(img.astype("float32"))
			e2=e2.get_clip(Region((convnet.outsize-shape[0])/2,(convnet.outsize-shape[0])/2,shape[0],shape[0]))
			#print float(shape[0])/float(convnet.outsize)
			e2.scale(float(shape[0])/float(convnet.outsize))
			e2.write_image(fname,-1)
			
			#### measure the amplitude of the neural network output by comparing it to the label
			e2.mult(e1)
			amp.append(e2["mean_nonzero"])
		print("amplitude: ", np.mean(amp))
		convnet.amplitude=np.mean(amp)
		print("Writing output on training set in {}".format(fname))
		
	save_model(convnet, options.netout, options)
	
	print("Done")
	E2end(E2n)
예제 #42
0
def build_model(tparams, model_options):
    x = T.matrix('x', dtype='float32')
    start_temperature = T.scalar('start_temperature', dtype='float32')
    num_step = T.scalar('num_step', dtype='int32')
    loss = compute_loss(x, model_options, tparams, start_temperature, num_step)
    return x, loss, start_temperature, num_step
예제 #43
0
    def __init__(self,N_tot,D,Q,Domain_number,Ydim,Hiddenlayerdim1,Hiddenlayerdim2,num_MC,n_rff):
        ########################################
        #BCなXの設定 後でこれもレイヤー化する
        self.Xlabel=T.matrix('Xlabel')

        
        self.X=T.matrix('X')
        self.Y=T.matrix('Y')
        N=self.X.shape[0]
        
        self.Weight=T.matrix('Weight')
        
        
        ########################
        #hiddenlyaerの作成
        self.Data_input=T.tile(self.X,(num_MC,1,1))
        
        ##########################################
        ####X側の推論
        
        self.RFF_X=RFFLayer(rng, self.Data_input, n_in=D, n_out=Q, num_MC=num_MC,num_FF=n_rff,Domain_number=Domain_number,number="X",Domain_consideration=True)
        
        self.params = self.RFF_X.all_params
        self.hyp_params=self.RFF_X.hyp_params
        self.variational_params=self.RFF_X.variational_params
        ##############################################################################################
        ###Y側の計算
        self.RFF_Y=RFFLayer(rng, self.RFF_X.output, n_in=Q, n_out=Ydim, num_MC=num_MC,num_FF=n_rff,number="Y",Domain_consideration=False)
   
        self.params.extend(self.RFF_Y.all_params)
        self.hyp_params.append(self.RFF_Y.lhyp)
        self.variational_params.extend(self.RFF_Y.variational_params)
        
        ##########################################
        #パラメータの格納   
        #self.no_updates=self.RFF_X.no_update
        self.wrt={}
        for i in self.params:
            self.wrt[str(i)]=i
        
        ###########################################
        ###目的関数
        #############X側
        
        #self.LL_X = self.RFF_X.likelihood_domain(self.X,self.Xlabel)*N_tot/(N*num_MC)
        self.KL_WX = self.RFF_X.KL_W        
        
        #############Y側
        self.LL_Y =  self.RFF_Y.liklihood_nodomain(self.Y)*N_tot/(N*num_MC)
        self.KL_WY = self.RFF_Y.KL_W
        #y=self.Gaussian_layer_Y.softmax_class()
        #self.LLY = -T.mean(T.nnet.categorical_crossentropy(y, self.Y))*N
        #############真ん中と予測
        
        #self.error = self.RFF_Y.error_RMSE(self.Y)
        
        pred = T.mean(self.RFF_Y.output,0)
        self.error = (T.mean((self.Y - pred)**2,0))**0.5
        #mu=T.mean(target,0)
        #self.error= (T.mean(T.mean((self.Y[None,:,:] - self.RFF_Y.output)**2,0)))**0.5
        
        ###########################################
        self.MMD=self.RFF_Y.MMD_central_penalty(self.Xlabel)*N_tot
예제 #44
0
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                    nkerns=[48, 64, 96], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer 这样默认就是第一次20个kernel,第二层50个kernel?
    """

    rng = numpy.random.RandomState(23455)

    '''
             原来的load data
    datasets = load_data(dataset)
    '''
    
    datasets = loadData()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size
    print(n_train_batches)
    print(n_valid_batches)
    if n_test_batches == 0:
        n_test_batches = 1

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (50, 50)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 50, 50))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, 50, 50),
            filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 23, 23),
            filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2))
    
    layer1_3 = LeNetConvPoolLayer(rng, input=layer1.output,
            image_shape=(batch_size, nkerns[1], 10, 10),
            filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1_3.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[2] * 4 * 4,
                         n_out=1000, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=1000, n_out=58)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([index], layer3.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1_3.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    best_params = params

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    saveParams(params)
예제 #45
0
def train(args,
          model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 =  'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2  + '/log.jsonl.gz', formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features',))
        dataset_test = MNIST(['test'], sources=('features',))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features',))
        dataset_test = CIFAR10(['test'], sources=('features',))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True)
            spatial_width=32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False)
            spatial_width=64

        n_colors = 3


    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets = ['train'], which_format="64", sources=('features',), load_in_memory=False)
        dataset_test = CelebA(which_sets = ['test'], which_format="64", sources=('features',), load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train, iteration_scheme = tr_scheme)
        test_stream = DataStream.default_stream(dataset_test, iteration_scheme = ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01,
                           sources=('features',))
        dataset_train = DataStream.default_stream(train_set,
                            iteration_scheme=ShuffledScheme(
                            train_set.num_examples, args.batch_size))

    else:
        raise ValueError("Unknown dataset %s."%args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(DataStream.default_stream(dataset_train,
                              iteration_scheme=ShuffledScheme(
                                  examples=dataset_train.num_examples - (dataset_train.num_examples%args.batch_size),
                                  batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch*scl)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature, step_chain = build_model(tparams, model_options)
    inps = [x.astype('float32'), start_temperature, step_chain]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature  = T.scalar('temperature', dtype='float32')
    step_chain_part  = T.scalar('step_chain_part', dtype='int32')

    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part)

    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j])


    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    #for param in tparams:
    #    print param
    #    print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print  'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():
            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,3*32*32),)
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                data_run = data_run.astype('float32')
                meta_cost.append(f_grad_shared(data_run, temperature_forward, meta_step))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(data_run, temperature_forward, meta_step)
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)
            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            logger.log({'epoch': eidx,
                        'batch_index': batch_index,
                        'uidx': uidx,
                        'training_error': cost})

            if batch_index%20==0:
                print batch_index, "cost", cost

            if batch_index%1000==0:
                print 'saving params'
                params = unzip(tparams)
                save_params(params, model_dir + '/' + 'params_' + str(batch_index) + '.npz')

            if batch_index%200==0:
                count_sample += 1
                '''
                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps -1 ))
                temperature_forward = args.temperature
                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(data[0].astype('float32'), temperature_forward, num_step)
                        x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                        plot_images(x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(x_data.astype('float32'), temperature_forward, num_step)
                        x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                        plot_images(x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor;
                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH)
                plot_images(x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' +  str(batch_index))

                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1 ))
                for i in range(args.num_steps*args.meta_steps + args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation  = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1)
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(scl, shft, x_data, model_dir + '/'+ "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                '''

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size,INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1))
                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps*args.meta_steps + args.extra_steps):
                    x_data,  sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1)
                    print 'On step number, using temperature', i, temperature
                    reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()
예제 #46
0
파일: nnet.py 프로젝트: Daniel6/doml
import theano
import theano.tensor as T
from load import mnist
import numpy as np
from costs import categorical_crossentropy
from updates import Adadelta

batch_size = 128
X = T.matrix()
Y = T.matrix()
n_in = 28*28
n_hidden = 512
n_out = 10

w_in = theano.shared(floatX(np.random.randn(n_in,n_hidden)*0.01))
w_out = theano.shared(floatX(np.random.randn(n_hidden,n_out)*0.01))
b_in = theano.shared(floatX(np.zeros((n_hidden))))
b_out = theano.shared(floatX(np.zeros((n_out))))

def model(X):
	h = T.tanh(T.dot(X,w_in)+b_in)
	y = T.nnet.softmax(T.dot(h,w_out)+b_out)
	return y

out = model(X)
err = categorical_crossentropy(Y,out)
params = [w_in,b_in,w_out,b_out]
grads = T.grad(err,params)
updates = Adadelta(params,grads)

train = theano.function([X,Y],err,updates=updates)
예제 #47
0
def main():
    parser = build_parser()
    args = parser.parse_args()

    np.random.seed(args.seed)
    trng = RandomStreams(args.seed)
    rng = np.random.RandomState(args.seed + 1)
    model_file = args.model_prefix + "_pars.npz"
    model_opts = args.model_prefix + "_opts.pkl"
    model_options = pkl.load(open(model_opts, 'rb'))

    # Load data
    data = IMDB_JMARS("./experiments/data",
                      seq_len=16,
                      batch_size=args.nb_samples,
                      topk=16000)
    model_options["dim_input"] = data.voc_size

    for num, (x, y, x_mask) in enumerate(data.get_valid_batch()):
        data.print_batch(x)
        break

    params = init_params(model_options)
    print('Loading model parameters...')
    params = load_params(model_file, params)
    tparams = init_tparams(params)

    x = T.lmatrix('x')
    y = T.lmatrix('y')
    x_mask = T.matrix('x_mask')
    # Debug test_value
    x.tag.test_value = np.random.rand(11, 20).astype("int64")
    y.tag.test_value = np.random.rand(11, 20).astype("int64")
    x_mask.tag.test_value = np.ones((11, 20)).astype("float32")
    is_train.tag.test_value = np.float32(0.)

    zmuv = T.tensor3('zmuv')
    zmuv.tag.test_value = np.ones(
        (11, 20, model_options['dim_z'])).astype("float32")

    # build the symbolic computational graph
    nll_rev, states_rev, updates_rev = \
        build_rev_model(tparams, model_options, x, y, x_mask)
    nll_gen, states_gen, kld, rec_cost_rev, updates_gen, \
        log_pxIz, log_pz, log_qzIx, z, _ = \
        build_gen_model(tparams, model_options, x, y, x_mask, zmuv, states_rev)
    # Build sampler
    f_next = build_sampler(tparams, model_options, trng, provide_z=True)
    # Build inference
    get_latents = theano.function([x, y, x_mask, zmuv],
                                  z,
                                  updates=(updates_gen + updates_rev),
                                  givens={is_train: np.float32(0.)})
    while True:
        s1 = raw_input("s1:").strip().split()
        s2 = raw_input("s2:").strip().split()

        s1_id = [data.word2idx.get(word, data.unk_id) for word in s1]
        s2_id = [data.word2idx.get(word, data.unk_id) for word in s2]

        batch = data.prepare_batch([s1_id, s2_id])
        data.print_batch(batch[0])

        zmuv = rng.normal(loc=0.0,
                          scale=1.0,
                          size=(batch[0].shape[1], 2,
                                model_options['dim_z'])).astype('float32')
        batch_z = get_latents(batch[0].T, batch[1].T, batch[2].T, zmuv)
        z1 = batch_z[:, [0], :]
        z2 = batch_z[:, [1], :]

        print("Beam Search")
        data.print_batch(batch[0][[0]], eos_id=data.eos_id, print_number=False)
        for i in np.linspace(0, 1, 11):
            print("{}: ".format(i), end="")
            z = ((1 - i) * z1) + (i * z2)  # Interpolate latent
            z = np.repeat(z, 10, axis=1)
            sample, sample_score = beam_sample(tparams,
                                               f_next,
                                               model_options,
                                               maxlen=20,
                                               zmuv=z,
                                               unk_id=data.unk_id,
                                               eos_id=data.eos_id,
                                               bos_id=data.bos_id)
            sample = [sample[0]]
            data.print_batch(sample, eos_id=data.eos_id, print_number=False)

        data.print_batch(batch[0][[1]], eos_id=data.eos_id, print_number=False)

        # Interpolation
        print("Samples")
        data.print_batch(batch[0][[0]], eos_id=data.eos_id, print_number=False)
        for i in np.linspace(0, 1, 11):
            print("{}: ".format(i), end="")
            z = ((1 - i) * z1) + (i * z2)  # Interpolate latent
            z = np.repeat(z, 10, axis=1)
            sample, sample_score = gen_sample(tparams,
                                              f_next,
                                              model_options,
                                              maxlen=20,
                                              argmax=False,
                                              zmuv=z,
                                              unk_id=data.unk_id,
                                              eos_id=data.eos_id,
                                              bos_id=data.bos_id)
            sample = [sample.T[np.argsort(sample_score)[-1]]]
            data.print_batch(sample, eos_id=data.eos_id, print_number=False)

        data.print_batch(batch[0][[1]], eos_id=data.eos_id, print_number=False)

        print("Argmax")
        data.print_batch(batch[0][[0]], eos_id=data.eos_id, print_number=False)
        for i in np.linspace(0, 1, 11):
            print("{}: ".format(i), end="")
            z = ((1 - i) * z1) + (i * z2)  # Interpolate latent
            sample, sample_score = gen_sample(tparams,
                                              f_next,
                                              model_options,
                                              maxlen=20,
                                              argmax=True,
                                              zmuv=z,
                                              unk_id=data.unk_id,
                                              eos_id=data.eos_id,
                                              bos_id=data.bos_id)
            data.print_batch(sample.T, eos_id=data.eos_id, print_number=False)
        data.print_batch(batch[0][[1]], eos_id=data.eos_id, print_number=False)
        raw_input("-- Next --")
        sys.exit(0)
        cost = T.mean((self.x - self.z)**2)
        gparams = T.grad(cost, self.params)
        updates = [(param, param - learning_rate * gparam)
                   for param, gparam in zip(self.params, gparams)]
        #         updates = gradient_updates_momentum(cost, self.params)

        return (cost, updates)


print 'nTrials x nFeatures ', np.shape(X_eeg)
print 'Target vector ', np.shape(y_eeg)
print 'Total number of subjects: ', subject_count
""" Generate symbolic variables for input (X and y
represent a minibatch)
"""
X = T.matrix('X')  # 2100 x 60 data
y = T.vector('y')  # labels, presented as 1D vector of [int] labels
""" Construct the logistic regression class """
rng = np.random.RandomState(1234)
n_hidden = 50
n_visible = np.shape(X_eeg)[1]
da = dA(numpy_rng=rng, input=X, n_visible=n_visible, n_hidden=n_hidden)

cost, updates = da.get_cost_updates(corruption_level=0.2, learning_rate=0.01)

train = theano.function(inputs=[X],
                        outputs=cost,
                        updates=updates,
                        allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=da.z)
""" Leave One Out """
예제 #49
0
sys.path.insert(1, os.path.join(base_path, '../../common'))
sys.path.insert(2, os.path.join(base_path, '../../database'))
sys.path.insert(1, os.path.join(base_path, '../'))

from db import DB
from project import Project
from performance import Performance
from data import Data

from cnn import CNN

if __name__ == '__main__':

    # load the model to use for performance evaluation

    x = T.matrix('x')

    rng = numpy.random.RandomState(1234)

    # retrieve the project settings from the database
    project = DB.getProject('evalcnn')

    # create the model based on the project
    model = CNN(rng=rng,
                input=x,
                offline=True,
                batch_size=project.batchSize,
                patch_size=project.patchSize,
                nkerns=project.nKernels,
                kernel_sizes=project.kernelSizes,
                hidden_sizes=project.hiddenUnits,
예제 #50
0
def make_matrix():
    """
    Returns a new Theano matrix.
    """

    return T.matrix()
예제 #51
0
def train_conv_net(datasets,
                   U,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0]) - 1
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch)]
    print parameters

    #define model architecture
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    #define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        #if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)

    #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    #extra data (at random)
    np.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = np.random.permutation(datasets[0])
        extra_data = train_set[:extra_data_num]
        new_data = np.append(datasets[0], extra_data, axis=0)
    else:
        new_data = datasets[0]
    new_data = np.random.permutation(new_data)
    n_batches = new_data.shape[0] / batch_size
    n_train_batches = int(np.round(n_batches * 0.9))
    #divide train set into train/val sets
    test_set_x = datasets[1][:, :img_h]
    test_set_y = np.asarray(datasets[1][:, -1], "int32")
    train_set = new_data[:n_train_batches * batch_size, :]
    val_set = new_data[n_train_batches * batch_size:, :]
    train_set_x, train_set_y = shared_dataset(
        (train_set[:, :img_h], train_set[:, -1]))
    val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    #compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)
    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)
    test_pred_layers = []
    test_size = test_set_x.shape[0]
    test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (test_size, 1, img_h, Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function([x, y],
                                     test_error,
                                     allow_input_downcast=True)

    #start training over mini-batches
    print '... training'
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0
    cost_epoch = 0
    while (epoch < n_epochs):
        start_time = time.time()
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        print(
            'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%'
            % (epoch, time.time() - start_time, train_perf * 100.,
               val_perf * 100.))
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss = test_model_all(test_set_x, test_set_y)
            test_perf = 1 - test_loss
    return test_perf