Esempio n. 1
0
 def pretraining_functions(self, train_set_x, train_set_y, batch_size):
     index = tensor.lscalar('index')
     index = tensor.lscalar('index')
     corruption_level = tensor.scalar('corruption')
     corruption_level = tensor.scalar('corruption')
     learning_rate = tensor.scalar('lr')
     learning_rate = tensor.scalar('lr')
     switch = tensor.iscalar('switch')
     n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
     batch_begin = index * batch_size
     batch_end = batch_begin + batch_size
     pretrain_fns = []
     for sugar in self.sugar_layers:
         cost, updates = sugar.get_cost_updates(corruption_level,
                                                learning_rate, switch)
         fn = function(inputs=[
             index,
             Param(corruption_level, default=0.2),
             Param(learning_rate, default=0.1),
             Param(switch, default=1)
         ],
                       outputs=[cost],
                       updates=updates,
                       givens={
                           self.x: train_set_x[batch_begin:batch_end],
                           self.y: train_set_y[batch_begin:batch_end]
                       },
                       on_unused_input='ignore')
         pretrain_fns.append(fn)
     return pretrain_fns
Esempio n. 2
0
    def greedy_pre_training(self, train_x, batch_size=1, pre_lr=0.25,dropout=True,denoising=False):

        pre_train_fns = []
        index = T.lscalar('index')
        lam = T.scalar('lam')
        beta = T.scalar('beta')
        rho = T.scalar('rho')

        i = 0
        print "\nCompiling functions for DA layers..."
        for sa in self.sa_layers:


            cost, updates = sa.get_cost_and_updates(l_rate=pre_lr, lam=lam, beta=beta, rho=rho, cost_fn=self.cost_fn_names[0], corruption_level=self.corruption_levels[i],denoising=denoising)

            #the givens section in this line set the self.x that we assign as input to the initial
            # curr_input value be a small batch rather than the full batch.
            # however, we don't need to set subsequent inputs to be an only a minibatch
            # because if self.x is only a portion, you're going to get the hidden activations
            # corresponding to that small batch of inputs.
            # Therefore, setting self.x to be a mini-batch is enough to make all the subsequents use
            # hidden activations corresponding to that mini batch of self.x
            sa_fn = function(inputs=[index, Param(lam, default=0.25), Param(beta, default=0.25), Param(rho, default=0.25)], outputs=cost, updates=updates, givens={
                self.x: train_x[index * batch_size: (index+1) * batch_size]
                }
            )

            pre_train_fns.append(sa_fn)
            i = i+1

        return pre_train_fns
Esempio n. 3
0
 def test_examples_7(self):
     from theano import Param
     x, y, w = T.dscalars('x', 'y', 'w')
     z = (x + y) * w
     f = function([x, Param(y, default=1), Param(w, default=2, name='w_by_name')], z)
     assert f(33)                   == array(68.0)
     assert f(33, 2)                == array(70.0)
     assert f(33, 0, 1)             == array(33.0)
     assert f(33, w_by_name=1)      == array(34.0)
     assert f(33, w_by_name=1, y=0) == array(33.0)
    def __init__(self, param_dict):

        self.param_dict = param_dict
        self.training_batch_size = param_dict['training_batch_size']
        nkerns = param_dict['nkerns']
        recept_width = param_dict['recept_width']
        pool_width = param_dict['pool_width']
        stride = param_dict['stride']
        dropout_prob = param_dict['dropout_prob']
        weight_decay = param_dict['l2_reg']
        activation = param_dict['activation']
        weights_variance = param_dict['weights_variance']
        n_channels = param_dict['n_channels']
        n_timesteps = param_dict['n_timesteps']
        n_fbins = param_dict['n_fbins']
        global_pooling = param_dict['global_pooling']
        rng = np.random.RandomState(23455)

        self.training_mode = T.iscalar('training_mode')
        self.x = T.tensor4('x')
        self.y = T.bvector('y')
        self.batch_size = theano.shared(self.training_batch_size)

        self.input = self.x.reshape((self.batch_size, 1, n_channels * n_fbins, n_timesteps))

        self.feature_extractor = FeatureExtractor(rng, self.input, nkerns, recept_width, pool_width, stride,
                                                  self.training_mode,
                                                  dropout_prob[0],
                                                  activation, weights_variance, n_channels, n_timesteps, n_fbins,
                                                  global_pooling)

        self.classifier = SoftmaxLayer(rng=rng, input=self.feature_extractor.output, n_in=nkerns[-1],
                                       training_mode=self.training_mode, dropout_prob=dropout_prob[-1])

        self.weights = self.feature_extractor.weights + self.classifier.weights

        # ---------------------- BACKPROP
        self.cost = self.classifier.cross_entropy_cost(self.y)
        self.cost = self.classifier.cross_entropy_cost(self.y)
        L2_sqr = sum((weight ** 2).sum() for weight in self.weights[::2])
        self.grads = T.grad(self.cost + weight_decay * L2_sqr, self.weights)
        self.updates = self.adadelta_updates(self.grads, self.weights)
        # self.updates = self.nesterov_momentum(self.grads, self.weights)

        # --------------------- FUNCTIONS
        self.train_model = theano.function([self.x, self.y, Param(self.training_mode, default=1)],
                                           outputs=self.cost,
                                           updates=self.updates)

        self.validate_model = theano.function([self.x, self.y, Param(self.training_mode, default=0)],
                                              self.cost)

        self.test_model = theano.function([self.x, Param(self.training_mode, default=0)],
                                          self.classifier.p_y_given_x[:, 1])
def plot_features(subject, data_path, model_path, test_labels, dataset='test'):
    with open(model_path + '/' + subject + '.pickle', 'rb') as f:
        state_dict = cPickle.load(f)
    cnn = ConvNet(state_dict['params'])
    cnn.set_weights(state_dict['weights'])
    scalers = state_dict['scalers']

    if dataset == 'test':
        d = load_test_data(data_path, subject)
        x = d['x']
        y = test_labels['preictal']
    elif dataset == 'train':
        d = load_train_data(data_path, subject)
        x, y = d['x'], d['y']
    else:
        raise ValueError('dataset')

    x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \
        else scale_across_features(x, x_test=None, scalers=scalers)

    cnn.batch_size.set_value(x.shape[0])
    get_features = theano.function([cnn.x, Param(cnn.training_mode, default=0)], cnn.feature_extractor.output,
                                 allow_input_downcast=True)

    logits_test = get_features(x)
    model = TSNE(n_components=2, random_state=0)
    z = model.fit_transform(np.float64(logits_test))
    plt.scatter(z[:, 0], z[:, 1], s=60, c=y)
    plt.show()
Esempio n. 6
0
    def test_examples_6(self):

        from theano import Param
        x, y = T.dscalars('x', 'y')
        z = x + y
        f = function([x, Param(y, default=1)], z)
        assert f(33) == array(34.0)
        assert f(33, 2) == array(35.0)
Esempio n. 7
0
    def __init__(self, num_features, num_classes):
        self.num_features = num_features
        self.num_classes = num_classes

        # Set up Theano network for the model
        # Features: (m, num_features)
        x = T.matrix("x")
        # Classes (one-hot): (m, num_classes)
        y = T.ivector("y")
        # Weights and bias, randomly initialized
        self.theta = theano.shared(value=numpy.zeros(
            num_features * num_classes, dtype=theano.config.floatX),
                                   name='theta',
                                   borrow=True)
        w = self.theta.reshape((num_features, num_classes))
        # Don't include bias: we put 1s in the input instead
        #b = theano.shared(numpy.zeros(num_classes), name="b")
        # Other training params
        self.reg_coef = T.scalar("reg")

        # Construct Theano expression graph
        activation = T.dot(x, w)
        # Softmax activations to get a probability distribution over the classes
        class_probs = nnet.softmax(activation)
        # The predicted class is that with highest activation (no need to do the softmax for this)
        prediction = T.argmax(activation, axis=1)
        # Cross-entropy loss function
        #xent = nnet.categorical_crossentropy(class_probs, y)
        xent = -T.mean(T.log(class_probs)[T.arange(y.shape[0]), y])
        # The cost to minimize, including L2 regularization
        cost = xent + self.reg_coef * (w[1:, :]**2).sum()
        # Compute the gradient of the cost
        self.gw = T.grad(cost, w)
        self.gtheta = T.grad(cost, self.theta)
        # Error in terms of hard predictions (accuracy)
        error = T.mean(T.neq(prediction, y))

        # Compile
        self._predict_fn = theano.function(inputs=[x], outputs=prediction)
        self._prob_fn = theano.function(inputs=[x], outputs=class_probs)
        self._cost_fn = theano.function(
            inputs=[x, y],
            outputs=xent,
        )
        self._cost_fn_reg = theano.function(
            inputs=[x, y, Param(self.reg_coef, default=0.01)],
            outputs=cost,
        )
        self._error_fn = theano.function(inputs=[x, y], outputs=error)

        self.w = w
        self.x = x
        self.y = y
        self._cost_without_reg = xent
    def fine_tuning(self, datasets, batch_size=1, fine_lr=0.2):
        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        gparams = T.grad(self.fine_cost, self.thetas)

        updates = [(param, param - gparam * fine_lr)
                   for param, gparam in zip(self.thetas, gparams)]

        fine_tuen_fn = function(
            inputs=[index, Param(self.lam_fine_tune, default=0.25)],
            outputs=self.fine_cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * self.batch_size:(index + 1) *
                            self.batch_size],
                self.y:
                train_set_y[index * self.batch_size:(index + 1) *
                            self.batch_size]
            })

        validation_fn = function(
            inputs=[index],
            outputs=self.error,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        def valid_score():
            return [validation_fn(i) for i in xrange(n_valid_batches)]

        return fine_tuen_fn, valid_score
Esempio n. 9
0
def test():
    # multiple inputs, multiple outputs
    a, b = T.dmatrices('a', 'b')
    diff = a - b
    abs_diff = T.abs_(diff)
    sqr_diff = diff ** 2
    f = function([a, b], [diff, abs_diff, sqr_diff])
    h, i, j = f([[0, 1], [2, 3]], [[4, 5], [6, 7]])

    # default value for function arguments
    a, b = T.dscalars('a', 'b')
    z = a + b
    f = function([a, Param(b, default=1)], z)
    print f(1, b=2)
    print f(1)
    print f(1, 2)

    # shared variable
    state = shared(0)
    inc = T.lscalar('inc') # state is int64 by default
    accumulator = function([inc], state, updates=[(state, state + inc)])
    print accumulator(300)
    print state.get_value()
    def __init__(self, network, regularize_bias=False, optimization="sgd"):
        """
        optimization selects the type of optimization algorithm used. The default, 'sgd', is
        standard stochastic gradient descent. Currently, the only alternative is 'adadelta',
        which implements AdaDelta updates.

        """
        self.network = network
        x, y = network.x, network.y

        # Training params
        self.learning_rate = T.scalar("learning_rate")
        self.reg_coef = T.scalar("reg_coef")
        self.class_weights = T.vector("class_weights", dtype="float64")
        # Needed for AdaDelta
        self.decay = T.scalar("decay")

        self.optimization = optimization

        # Cross-entropy loss function
        log_probs = T.log(network.class_probs[T.arange(y.shape[0]), y])
        xent = -T.mean(log_probs * self.class_weights[y])
        #xent = -self.network._mean_per_class_target_log_prob
        # The cost to minimize, including L2 regularization
        cost = xent + self.reg_coef * ((network.w0**2.).mean() +
                                       (network.w1**2.).mean())
        if regularize_bias:
            cost += self.reg_coef * ((network.b0**2.).mean() +
                                     (network.b1**2.).mean())

        parameters = [network.w0, network.w1, network.b0, network.b1]
        # Compute the gradient of the cost wrt the parameters
        gradients = [T.grad(cost, param) for param in parameters]

        if optimization == "adadelta":
            # AdaDelta updates, based on Shawn Tan's implementation:
            #   https://blog.wtf.sg/2014/08/28/implementing-adadelta/
            extra_params = [
                Param(self.learning_rate, default=1e-6),
                Param(self.decay, default=0.95)
            ]

            # Store intermediate updates
            gradients_sq = [
                theano.shared(numpy.zeros(p.get_value().shape))
                for p in parameters
            ]
            deltas_sq = [
                theano.shared(numpy.zeros(p.get_value().shape))
                for p in parameters
            ]

            # Calculates the new "average" delta for the next iteration
            gradients_sq_new = [
                self.decay * g_sq + (1 - self.decay) * (g**2)
                for g_sq, g in izip(gradients_sq, gradients)
            ]

            # Calculates the step in direction
            # The square root is an approximation to getting the RMS for the average value
            deltas = [(T.sqrt(d_sq + self.learning_rate) /
                       T.sqrt(g_sq + self.learning_rate)) * grad
                      for d_sq, g_sq, grad in izip(deltas_sq, gradients_sq_new,
                                                   gradients)]

            # calculates the new "average" deltas for the next step.
            deltas_sq_new = [
                self.decay * d_sq + (1 - self.decay) * (d**2)
                for d_sq, d in izip(deltas_sq, deltas)
            ]

            # Prepare the updates list
            updates = (
                # Update the squared gradients
                zip(gradients_sq, gradients_sq_new) +
                # Update the squared deltas
                zip(deltas_sq, deltas_sq_new) +
                # Update the model's actual parameters
                [(param, param - delta)
                 for (param, delta) in izip(parameters, deltas)])
        else:
            # Standard SGD updates
            extra_params = [Param(self.learning_rate, default=0.1)]
            updates = [(param, param - self.learning_rate * grad)
                       for (param, grad) in zip(parameters, gradients)]

        # Compile
        self._train_fn = theano.function(
            inputs=[
                x,
                y,
                Param(self.reg_coef, default=0.01),
                Param(self.class_weights,
                      default=numpy.ones(network.num_classes,
                                         dtype=numpy.float64)),
            ] + extra_params,
            outputs=T.sum(log_probs),
            updates=updates,
            givens=[(network.output_bias, 1),
                    (network.hidden_bias, 1)],  # Bias enabled for training
            #on_unused_input="warn",
        )
        self._cost_fn = theano.function(
            inputs=[
                x, y,
                Param(self.class_weights,
                      default=numpy.ones(network.num_classes,
                                         dtype=numpy.float64)),
                Param(self.reg_coef, default=0.01)
            ],
            outputs=cost,
            givens=[(network.output_bias, 1), (network.hidden_bias, 1)],
            mode='FAST_RUN',
            #on_unused_input="warn",
        )
        self._costs = theano.function(
            inputs=[
                x, y,
                Param(self.class_weights,
                      default=numpy.ones(network.num_classes,
                                         dtype=numpy.float64))
            ],
            outputs=xent,
            givens=[(network.output_bias, 1), (network.hidden_bias, 1)],
            #on_unused_input="warn",
        )
Esempio n. 11
0
    def validate(self, train_set, valid_set, init_learning_rate, max_iters,
                 validation_frequency, improvement_threshold):

        train_set_iterator = TrainSetIterator(train_set,
                                              self.training_batch_size)
        n_batches = train_set_iterator.get_number_of_batches()
        print 'training set \nshape:', train_set[
            1].shape, 'number of seizures:', np.sum(
                train_set[1]), 'number of batches:', n_batches

        valid_set_x, valid_set_y = valid_set
        valid_size = valid_set_x.shape[0]
        print 'validation set \nshape:', valid_size, 'number of seizures:', np.sum(
            valid_set[1])

        learning_rate = theano.shared(np.float32(init_learning_rate))
        learning_rate_decay = np.float32(init_learning_rate / max_iters)

        cost = self.layer3.negative_log_likelihood(self.y)
        grads = T.grad(cost, self.params)

        #self._check_num_gradient(train_set_iterator.next())
        #updates = self._momentum_updates(grads, learning_rate)
        #updates = self._rmsprop_updates(grads,learning_rate)

        updates = self._vanilla_updates(grads, learning_rate)

        #-------------------- FUNCTIONS
        tp, tn = self.layer3.tptn(self.y)
        fp, fn = self.layer3.fpfn(self.y)

        train_model = theano.function(
            [self.x, self.y,
             Param(self.training_mode, default=1)],
            [cost, self.layer3.p_y_given_x, self.layer2.output],
            updates=updates,
            on_unused_input='ignore')
        validate_model = theano.function(
            [self.x, self.y,
             Param(self.training_mode, default=0)], [cost, tp, tn, fp, fn],
            on_unused_input='ignore')
        #------------------------------  TRAINING
        iter = 0
        epoch = 0
        best_cost = np.inf
        best_iter = 0
        patience_increase = 2
        patience = 150 * validation_frequency  #50
        done_looping = False
        start_time = time.clock()
        while not done_looping:
            epoch += 1
            for x, y in train_set_iterator:
                iter += 1
                train_model(x, y)
                learning_rate.set_value(
                    max(learning_rate.get_value() - learning_rate_decay, 0.0))
                # ------------------------ VALIDATION
                if iter % validation_frequency == 0:
                    self.batch_size.set_value(valid_size)
                    [valid_cost, tp, tn, fp,
                     fn] = validate_model(valid_set_x, valid_set_y)
                    print epoch, iter, tp, tn, fp, fn, valid_cost, learning_rate.get_value(
                    )

                    self.batch_size.set_value(self.training_batch_size)

                    if valid_cost < best_cost:
                        if valid_cost < best_cost * improvement_threshold:
                            patience = max(patience, iter * patience_increase)
                        best_iter = iter
                        best_cost = valid_cost

                    if iter >= max_iters or patience <= iter:
                        done_looping = True
                        break

        print 'time:', (time.clock() - start_time) / 60.
        print 'best_iter:', best_iter
        return best_iter
Esempio n. 12
0
    def test(self, train_set, test_set, init_learning_rate,
             learning_rate_decay, opt_iters, out_file):
        train_set_iterator = TrainSetIterator(train_set,
                                              self.training_batch_size)
        n_batches = train_set_iterator.get_number_of_batches()
        print 'training set \nshape:', train_set[
            1].shape, 'number of seizures:', np.sum(
                train_set[1]), 'number of batches:', n_batches

        test_set_x, test_set_y = test_set
        test_size = test_set_x.shape[0]
        print 'test set \nshape:', test_size, 'number of seizures:', np.sum(
            test_set[1])

        learning_rate = theano.shared(np.float32(init_learning_rate))
        learning_rate_decay = np.float32(learning_rate_decay)

        cost = self.layer3.negative_log_likelihood(self.y)
        grads = T.grad(cost, self.params)
        updates = self._vanilla_updates(grads, learning_rate)

        #----------- FUNCTIONS
        tp, tn = self.layer3.tptn(self.y)
        fp, fn = self.layer3.fpfn(self.y)
        tp_idx = self.layer3.tp_idx(self.y)
        fp_idx = self.layer3.fp_idx(self.y)

        train_model = theano.function(
            [self.x, self.y,
             Param(self.training_mode, default=1)],
            cost,
            updates=updates,
            on_unused_input='ignore')
        test_model = theano.function(
            [self.x, self.y,
             Param(self.training_mode, default=0)],
            [tp_idx, fp_idx, tp, tn, fp, fn],
            on_unused_input='ignore')

        iter = 0
        done_looping = False
        #------------------------------  TRAINING
        while not done_looping:
            for x, y in train_set_iterator:
                iter += 1
                train_model(x, y)
                learning_rate.set_value(
                    max(learning_rate.get_value() - learning_rate_decay, 0.0))
                if iter > opt_iters:
                    done_looping = True
                    break
                    #------------------------------  TESTING
        self.batch_size.set_value(test_size)
        [tp_idx, fp_idx, tp, tn, fp, fn] = test_model(test_set_x, test_set_y)
        seizure_idx = np.flatnonzero(test_set_y)
        det_dict = detections_and_delay(tp_idx, fp_idx, seizure_idx)
        print '-- TEST --'
        print 'tp:', tp, 'tn:', tn, 'fp:', fp, 'fn', fn
        print 'fp indices:', fp_idx, 'tp indices:', tp_idx
        print 'seizure indices:', seizure_idx
        print det_dict
        json.dump(det_dict, out_file)
        out_file.write('\n')
Esempio n. 13
0
#!/usr/bin/env python
import numpy
import theano
from theano import tensor as T
from theano import Param
from theano import function
from theano import shared
rng = numpy.random
__author__ = 'yanziang'

# default value for parameter of function
x, y = T.dscalars('x', 'y')
z = x + y
f = function([x, Param(y, default=1.)], z)
print f(33.0)
print f(33.0, 2.0)

# logistic regression
print 'logistic regression'
N = 400
feats = 784
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000

# Declare Theano symbolic variables
x = T.matrix("x")
y = T.vector("y")
w = theano.shared(rng.randn(feats), name="w")
b = theano.shared(0., name="b")
print("Initial model:")
print(w.get_value())
Esempio n. 14
0
    def __init__(self, network, optimization="sgd", loss="xent", input_var=None, extra_update_params=[],
                 extra_reg_params=[]):
        """
        optimization selects the type of optimization algorithm used. The default, 'sgd', is
        standard stochastic gradient descent. Currently, the only alternative is 'adadelta',
        which implements AdaDelta updates.

        loss is "xent" or "l2".

        extra_update_params allows you to specify other parameters that should be updated during
        training. They must, of course, feature in expression that computes the cost function.

        Likewise, extra_reg_params allows you to include extra parameters in the L2 regularization
        term. They should each be a 1D vector.

        """
        self.network = network
        if input_var is not None:
            x = input_var
        else:
            # Take the network's input as input to the training functions
            x = network.x
        # Create a target variable, of the same rank and type as the hidden layer
        # Special case for where the last layer has just a single unit: don't want y to need to be (M,1), just a vector
        if self.network.layer_sizes[-1] == 1:
            y = T.tensor(network.hidden_layer.dtype, (False,), name="y")
            # For computing the cost, add an extra dimension so the result is (M,1), not (M,)
            label_for_cost = y.dimshuffle(0, "x")
        else:
            y = label_for_cost = T.tensor(network.hidden_layer.dtype, network.hidden_layer.broadcastable, name="y")

        # Training params
        self.learning_rate = T.scalar("learning_rate")
        self.reg_coef = T.scalar("reg_coef")
        # Needed for AdaDelta
        self.decay = T.scalar("decay")
        self.optimization = optimization

        # Build cost function
        reg = self.network.get_l2_regularization(extra_params=extra_update_params)
        cost = self.network.get_cost(label_for_cost, loss=loss)
        cost_with_reg = cost + self.reg_coef * reg

        parameters = network.params + extra_update_params
        # Compute the gradient of the cost wrt the parameters
        gradients = [T.grad(cost_with_reg, param) for param in parameters]

        if optimization == "adadelta":
            # AdaDelta updates, based on Shawn Tan's implementation:
            #   https://blog.wtf.sg/2014/08/28/implementing-adadelta/
            extra_params = [Param(self.learning_rate, default=1e-6),
                            Param(self.decay, default=0.95)]

            # Store intermediate updates
            gradients_sq = [theano.shared(numpy.zeros(p.get_value().shape)) for p in parameters]
            deltas_sq = [theano.shared(numpy.zeros(p.get_value().shape)) for p in parameters]

            # Calculates the new "average" delta for the next iteration
            gradients_sq_new = [self.decay*g_sq + (1-self.decay)*(g**2) for g_sq, g in izip(gradients_sq, gradients)]

            # Calculates the step in direction
            # The square root is an approximation to getting the RMS for the average value
            deltas = [(T.sqrt(d_sq+self.learning_rate)/T.sqrt(g_sq+self.learning_rate))*grad
                      for d_sq, g_sq, grad in izip(deltas_sq, gradients_sq_new, gradients)]

            # calculates the new "average" deltas for the next step.
            deltas_sq_new = [self.decay*d_sq + (1-self.decay)*(d**2) for d_sq, d in izip(deltas_sq, deltas)]

            # Prepare the updates list
            updates = (
                # Update the squared gradients
                zip(gradients_sq, gradients_sq_new) +
                # Update the squared deltas
                zip(deltas_sq,deltas_sq_new) +
                # Update the model's actual parameters
                [(param, param - delta) for (param, delta) in izip(parameters, deltas)]
            )
        else:
            # Standard SGD updates
            extra_params = [Param(self.learning_rate, default=0.1)]
            updates = [(param, param - self.learning_rate * grad) for (param, grad) in zip(parameters, gradients)]

        # Compile
        self._train_fn = theano.function(
            inputs=[
                x, y,
                Param(self.reg_coef, default=0.01),
            ] + extra_params,
            outputs=T.mean(cost),
            updates=updates,
        )
        self._cost_fn = theano.function(
            inputs=[
                x, y,
            ],
            outputs=cost,
        )
Esempio n. 15
0
import theano.tensor as T
from theano import function
from theano import Param

x, y = T.scalars('x', 'y')
z = x + y
f = function([x, Param(y, default=1)], z)

print f(33)
print f(33, 2)

w = T.scalar('w')
z_two = (x + y) * w
f_two = function(
    [x, Param(y, default=1),
     Param(w, default=2, name='w_by_name')], z_two)

print("Second function")
print f_two(33)
print f_two(33, 2)
print f_two(33, 0, 1)
print f_two(33, w_by_name=1)
print f_two(33, w_by_name=1, y=0)
Esempio n. 16
0
	def __init__(self, batch_size= 8):
		self.batch_size = batch_size
		self.num_updates = 0
		layers = []
		X = T.matrix().reshape((batch_size,1,225,225))

		Y = T.ivector()

		is_train = T.scalar()

		inputs = InputLayer(X,name="input")
		layers.append(inputs)

		convlayer0 = ConvLayer(inputs,(64,1,15,15), subsample=(3,3) ,name="conv 0")
		relu0 = RELU(convlayer0, name="relu 0")
		pool0 = PoolLayer(relu0, pool_size=(3,3), stride=(2,2), name="pool 0")

		layers += [convlayer0,relu0,pool0]

		convlayer1 = ConvLayer(pool0,(128,64,5,5), name="conv 1")
		relu1 = RELU(convlayer1, name="relu 1")
		pool1 = PoolLayer(relu1, pool_size=(3,3), stride=(2,2), name="pool 1")

		layers += [convlayer1,relu1,pool1]

		convlayer2 = ConvLayer(pool1,(256,128,3,3),border_mode=(1,1), name="conv 2")
		relu2 = RELU(convlayer2, name="relu 2")

		layers += [convlayer2,relu2]

		convlayer3 = ConvLayer(relu2,(256,256,3,3),border_mode=(1,1) , name="conv 3")
		relu3 = RELU(convlayer3, name="relu 3")

		layers += [convlayer3,relu3]

		convlayer4 = ConvLayer(relu3,(256,256,3,3),border_mode=(1,1), name="conv 4")
		relu4 = RELU(convlayer4, name="relu 4")
		pool2 = PoolLayer(relu4, pool_size=(3,3), stride=(2,2), name="pool 2")


		drop0 = DropoutLayer(pool2, is_train, p = 0.5, name="Drop 0, p = 0.5")
		layers += [convlayer4,relu4,pool2,drop0]

		convlayer5 = ConvLayer(drop0,(512,256,7,7),name="conv 5")
		relu5 = RELU(convlayer5,name="relu 5")

		squeezed = Squeeze(relu5,outdim=2, name="squeeze 1")


		drop1 = DropoutLayer(squeezed, is_train, p = 0.5, name="Drop 1, p = 0.5")

		layers += [convlayer5, relu5, squeezed, drop1]
		# layers += [convlayer5, relu5, squeezed]

		fullyconn1 = FCLayer(drop1,512,512,name="FC 1")
		# fullyconn1 = FCLayer(squeezed,512,512,name="FC 1")

		relu6 = RELU(fullyconn1,name="relu 6")

		drop2 = DropoutLayer(relu6, is_train, p = 0.5, name="Drop 2, p = 0.5")


		#fullyconn2 = FCLayer(fullyconn1,512,250,name="FC 2")
		fullyconn2 = FCLayer(drop2,512,250,name="FC 2")
		
		softmax1 = SoftmaxLayer(fullyconn2, name="softmax")


		# layers += [fullyconn1,fullyconn2,softmax1]
		layers += [fullyconn1,drop2, relu6, fullyconn2,softmax1]
		
		predicted_class = T.argmax(softmax1.output(), axis=1)

		cost = CategoricalCrossEntropy(softmax1,Y).output()


		self.layers = layers

		params = get_params(self.layers)
		biases = get_biases(self.layers)

		caches_params = make_caches(params)
		caches_bias = make_caches(biases)
		eta = T.scalar()

		updates = momentum(cost, params, biases, caches_params,caches_bias, eta)
		
		self.train = theano.function([X,Y,eta, Param(is_train,1)],[cost,predicted_class],updates=updates,allow_input_downcast=True,on_unused_input='warn')

		self.predict = theano.function([X,Param(is_train,0)],predicted_class,allow_input_downcast=True,on_unused_input='warn')

		self.predict_with_drop = theano.function([X,Param(is_train,1)],predicted_class,allow_input_downcast=True, on_unused_input='warn')	
		
		self.predict_prob = theano.function([X,Param(is_train,0)],softmax1.output(), allow_input_downcast=True,on_unused_input='warn')
		
		self.validate = theano.function([X,Y,Param(is_train,0)],cost,allow_input_downcast=True, on_unused_input='warn')

		self.get_embeddings = theano.function([X,Param(is_train,0)],fullyconn1.output(), allow_input_downcast=True, on_unused_input='warn')
Esempio n. 17
0
    
    outputs:    输出参数列表,list或者dict。如果是dict,那么key必须是字符串。
    
    updates:    一组可迭代更新的量 (shared_variable, new_expression)的形式
                对其中的shared_variable输入用new_expression表达式更新,而这个形式可以是列表,元组或者有序字典
                updates其实也是每次调用function都会执行一次,则所有的shared_variable都会根据new_expression更新一次值。
                
    givens:     里面存放的是可迭代量,可以是列表,元组或者字典。每次调用function,givens的量都会迭代变化。
                它跟inputs一样也是作为参数传递给outputs的
                
"""
# 默认参数

x,y=T.dscalars('x','y')
z=x+y
f1=function([x, Param(y,default=1,name='by_name')],z)
print(f1(33))
print(f1(33,2))
print(f1(33,by_name=3))

# 共享变量
'''
    为了使GPU调用这些变量时,遇到一次就要调用一次,这样就会花费大量时间在数据存取上,导致使用GPU代码运行很慢,甚至比仅用CPU还慢。
    
    共享变量的类型必须为floatX
    
    shared 变量可以作为函数将的可以访问的数据,可以用get_value, set_value两个函数访问和获取值
    
    shared 变量既可以作为符号变量,也可以作为共享变量
    
'''
Esempio n. 18
0
    def train(self,
              batch_iterator,
              iterations=10000,
              iteration_callback=None,
              validation_set=None,
              stopping_iterations=10,
              log=None,
              cost_plot_filename=None,
              training_cost_prop_change_threshold=0.0005,
              learning_rate=0.1,
              regularization=0.,
              class_weights_vector=None,
              corruption_level=0.,
              continuous_corruption=False,
              loss="xent"):
        """
        Train on data stored in Theano tensors. Uses minibatch training.

        batch_iterator should be a repeatable iterator producing batches.

        iteration_callback is called after each iteration with args (iteration, error array).

        If a validation set (matrix) is given, it is used to compute an error after each iteration
        and to enforce a stopping criterion. The algorithm will terminate if it goes stopping_iterations
        iterations without an improvement in validation error.

        If compute_error_frequency > 1 (default=5), this number of iterations are performed between each time
        the error is computed on the training set.

        The algorithm will assume it has converged and stop early if the proportional change between successive
        training costs drops below training_cost_prop_change_threshold for five iterations in a row.

        Uses L2 regularization.

        """
        if log is None:
            log = get_console_logger("Autoencoder train")

        log.info(
            "Training params: learning rate=%s, noise ratio=%.1f%% (%s), regularization=%s"
            % (learning_rate, corruption_level * 100.0, "continuous corruption"
               if continuous_corruption else "zeroing corruption",
               regularization))
        log.info("Training with SGD")

        ######## Compile functions
        # Prepare cost/update functions for training
        cost, updates = self.network.get_cost_updates(
            self.learning_rate,
            self.regularization,
            class_cost_weights=class_weights_vector,
            corruption_level=corruption_level,
            continuous_corruption=continuous_corruption,
            loss=loss)
        # Prepare training functions
        cost_fn = theano.function(
            inputs=[self.network.x,
                    Param(self.regularization, default=0.0)],
            outputs=cost,
        )
        train_fn = theano.function(
            inputs=[
                self.network.x,
                Param(self.learning_rate, default=0.1),
                Param(self.regularization, default=0.0)
            ],
            outputs=cost,
            updates=updates,
        )
        # Prepare a function to test how close to the identity function the learned mapping is
        # A lower value indicates that it's generalizing more (though not necessarily better)
        identity_ratio = T.mean(
            T.sum(self.network.get_prediction_dist() * (self.network.x > 0),
                  axis=1))
        identity_ratio_fn = theano.function(inputs=[self.network.x],
                                            outputs=identity_ratio)
        ###########

        # Keep a record of costs, so we can plot them
        val_costs = []
        training_costs = []

        # Keep a copy of the best weights so far
        val_cost = 0.
        best_weights = best_iter = best_val_cost = None
        if validation_set is not None:
            best_weights = self.network.get_weights()
            best_iter = -1
            best_val_cost = cost_fn(validation_set)

            log.info("Computing initial validation scores")
            f_score, precision, recall, f_score_classes = self.compute_f_scores(
                validation_set)
            log.info(
                "F-score: %.4f%% (mean over %d classes), P=%.4f%%, R=%.4f%%" %
                (f_score * 100.0, f_score_classes, precision * 100.0,
                 recall * 100.0))
            identity_ratio = identity_ratio_fn(validation_set)
            log.info("Identity ratio = %.4g" % identity_ratio)

        below_threshold_its = 0

        for i in range(iterations):
            err = 0.0
            batch_num = 0
            for batch_num, batch in enumerate(batch_iterator):
                # Shuffle the training data between iterations, as one should with SGD
                # Just shuffle within batches
                shuffle = numpy.random.permutation(batch.shape[0])
                batch[:] = batch[shuffle]

                # Update the model with this batch's data
                err += train_fn(batch,
                                learning_rate=learning_rate,
                                regularization=regularization)

            training_costs.append(err / batch_num)

            if validation_set is not None:
                # Compute the cost function on the validation set
                val_cost = cost_fn(validation_set) / validation_set.shape[0]
                val_costs.append(val_cost)
                if val_cost <= best_val_cost:
                    # We assume that, if the validation error remains the same, it's better to use the new set of
                    # weights (with, presumably, a better training error)
                    if val_cost == best_val_cost:
                        log.info(
                            "Same validation cost: %.4f, using new weights" %
                            val_cost)
                    else:
                        log.info("New best validation cost: %.4f" % val_cost)
                    # Update our best estimate
                    best_weights = self.network.get_weights()
                    best_iter = i
                    best_val_cost = val_cost
                if val_cost >= best_val_cost and i - best_iter >= stopping_iterations:
                    # We've gone on long enough without improving validation error
                    # Time to call a halt and use the best validation error we got
                    log.info(
                        "Stopping after %d iterations of increasing validation cost"
                        % stopping_iterations)
                    break

            log.info(
                "COMPLETED ITERATION %d: training cost=%.5g, val cost=%.5g" %
                (i, training_costs[-1], val_cost))

            if cost_plot_filename:
                # Plot the cost function as we train
                # Skip the first costs, as they're usually so much higher than others that the rest is indistinguishable
                columns = [(training_costs[1:], "Train cost")]
                if validation_set is not None:
                    columns.append((val_costs[1:], "Val cost"))
                ax = plot_costs(None, *columns)
                # Add a line at the most recent best val cost
                ax.axvline(float(best_iter), color="b")
                ax.text(float(best_iter + 1) + 0.1,
                        best_val_cost * 1.1,
                        "Best val cost",
                        color="b")
                from matplotlib import pyplot as plt
                plt.savefig(cost_plot_filename)

            if validation_set is not None:
                f_score, precision, recall, f_score_classes = self.compute_f_scores(
                    validation_set)
                log.info(
                    "Validation f-score: %.4f%% (mean over %d classes), P=%.4f%%, R=%.4f%%"
                    % (f_score * 100.0, f_score_classes, precision * 100.0,
                       recall * 100.0))
                identity_ratio = identity_ratio_fn(validation_set)
                log.info("Validation identity ratio = %.4g" % identity_ratio)

            if iteration_callback is not None:
                # Not computing training error at the moment
                iteration_callback(i, training_costs[-1], val_cost, 0.0,
                                   best_iter)

            # Check the proportional change between this iteration's training cost and the last
            if len(training_costs) > 2:
                training_cost_prop_change = abs(
                    (training_costs[-2] - training_costs[-1]) /
                    training_costs[-2])
                if training_cost_prop_change < training_cost_prop_change_threshold:
                    # Very small change in training cost - maybe we've converged
                    below_threshold_its += 1
                    if below_threshold_its >= 5:
                        # We've had enough iterations with very small changes: we've converged
                        log.info(
                            "Proportional change in training cost (%g) below %g for five successive iterations: "
                            "converged" %
                            (training_cost_prop_change,
                             training_cost_prop_change_threshold))
                        break
                    else:
                        log.info(
                            "Proportional change in training cost (%g) below %g for %d successive iterations: "
                            "waiting until it's been low for five iterations" %
                            (training_cost_prop_change,
                             training_cost_prop_change_threshold,
                             below_threshold_its))
                else:
                    # Reset the below threshold counter
                    below_threshold_its = 0

        if best_weights is not None:
            # Use the weights that gave us the best error on the validation set
            self.network.set_weights(best_weights)
Esempio n. 19
0
    def train(self,
              batch_iterator,
              iterations=10000,
              iteration_callback=None,
              validation_set=None,
              stopping_iterations=10,
              log=None,
              cost_plot_filename=None,
              training_cost_prop_change_threshold=0.0005,
              learning_rate=0.1,
              regularization=0.,
              class_weights_vector=None,
              corruption_level=0.,
              continuous_corruption=False,
              loss="xent"):
        """
        See autoencoder trainer: uses the same training for each layer in turn, then rolls out and
        trains the whole thing together.

        """
        if log is None:
            log = get_console_logger("Autoencoder train")

        # Because the layers are all already properly stacked, when we get the cost/updates for a layer,
        # it's already a function of the original input, but only updates the layer itself
        for layer_num, layer in enumerate(self.network.layers):
            log.info("TRAINING LAYER %d" % layer_num)
            ## Compile functions
            # Prepare cost/update functions for training
            cost, updates = layer.get_cost_updates(
                self.learning_rate,
                self.regularization,
                class_cost_weights=class_weights_vector,
                corruption_level=corruption_level,
                continuous_corruption=continuous_corruption,
                loss=loss)
            # Prepare training functions
            # Note that these use the initial input, not the layer input
            cost_fn = theano.function(
                inputs=[self.input,
                        Param(self.regularization, default=0.0)],
                outputs=cost,
            )
            train_fn = theano.function(
                inputs=[
                    self.input,
                    Param(self.learning_rate, default=0.1),
                    Param(self.regularization, default=0.0)
                ],
                outputs=cost,
                updates=updates,
            )
            # Prepare a function to test how close to the identity function the learned mapping is
            # A lower value indicates that it's generalizing more (though not necessarily better)
            identity_ratio = T.mean(
                T.sum(layer.get_prediction_dist() * (layer.x > 0), axis=1))
            identity_ratio_fn = theano.function(inputs=[self.input],
                                                outputs=identity_ratio)

            # Keep a record of costs, so we can plot them
            val_costs = []
            training_costs = []

            # Keep a copy of the best weights so far
            val_cost = 0.
            best_weights = best_iter = best_val_cost = None
            if validation_set is not None:
                best_weights = layer.get_weights()
                best_iter = -1
                best_val_cost = cost_fn(validation_set)

                log.info("Computing initial validation scores")
                identity_ratio = identity_ratio_fn(validation_set)
                log.info("Identity ratio = %.4g" % identity_ratio)

            log.info("Computing initial training cost")
            batch_costs = [cost_fn(batch) for batch in batch_iterator]
            initial_cost = sum(batch_costs) / len(batch_costs)
            log.info("Cost = %g (%d batches)" %
                     (initial_cost, len(batch_costs)))

            below_threshold_its = 0

            for i in range(iterations):
                err = 0.0
                batch_num = 0
                for batch_num, batch in enumerate(batch_iterator):
                    # Shuffle the training data between iterations, as one should with SGD
                    # Just shuffle within batches
                    shuffle = numpy.random.permutation(batch.shape[0])
                    batch[:] = batch[shuffle]

                    # Update the model with this batch's data
                    err += train_fn(batch,
                                    learning_rate=learning_rate,
                                    regularization=regularization)

                training_costs.append(err / batch_num)

                if validation_set is not None:
                    # Compute the cost function on the validation set
                    val_cost = cost_fn(
                        validation_set) / validation_set.shape[0]
                    val_costs.append(val_cost)
                    if val_cost <= best_val_cost:
                        # We assume that, if the validation error remains the same, it's better to use the new set of
                        # weights (with, presumably, a better training error)
                        if val_cost == best_val_cost:
                            log.info(
                                "Same validation cost: %.4f, using new weights"
                                % val_cost)
                        else:
                            log.info("New best validation cost: %.4f" %
                                     val_cost)
                        # Update our best estimate
                        best_weights = layer.get_weights()
                        best_iter = i
                        best_val_cost = val_cost
                    if val_cost >= best_val_cost and i - best_iter >= stopping_iterations:
                        # We've gone on long enough without improving validation error
                        # Time to call a halt and use the best validation error we got
                        log.info(
                            "Stopping after %d iterations of increasing validation cost"
                            % stopping_iterations)
                        break

                    log.info(
                        "COMPLETED ITERATION %d: training cost=%.5g, val cost=%.5g"
                        % (i, training_costs[-1], val_cost))
                else:
                    log.info("COMPLETED ITERATION %d: training cost=%.5g" %
                             (i, training_costs[-1]))

                if cost_plot_filename:
                    # Plot the cost function as we train
                    # Skip the first costs, as they're usually so much higher that the rest is indistinguishable
                    columns = [(training_costs[1:], "Train cost")]
                    if validation_set is not None:
                        columns.append((val_costs[1:], "Val cost"))
                    ax = plot_costs(None, *columns)
                    # Add a line at the most recent best val cost
                    ax.axvline(float(best_iter), color="b")
                    ax.text(float(best_iter + 1) + 0.1,
                            best_val_cost * 1.1,
                            "Best val cost",
                            color="b")
                    from matplotlib import pyplot as plt
                    plt.savefig(cost_plot_filename)

                if validation_set is not None:
                    identity_ratio = identity_ratio_fn(validation_set)
                    log.info("Validation identity ratio = %.4g" %
                             identity_ratio)

                if iteration_callback is not None:
                    # Not computing training error at the moment
                    iteration_callback(i, training_costs[-1], val_cost, 0.0,
                                       best_iter)

                # Check the proportional change between this iteration's training cost and the last
                if len(training_costs) > 2:
                    training_cost_prop_change = abs(
                        (training_costs[-2] - training_costs[-1]) /
                        training_costs[-2])
                    if training_cost_prop_change < training_cost_prop_change_threshold:
                        # Very small change in training cost - maybe we've converged
                        below_threshold_its += 1
                        if below_threshold_its >= 5:
                            # We've had enough iterations with very small changes: we've converged
                            log.info(
                                "Proportional change in training cost (%g) below %g for five successive iterations: "
                                "converged" %
                                (training_cost_prop_change,
                                 training_cost_prop_change_threshold))
                            break
                        else:
                            log.info(
                                "Proportional change in training cost (%g) below %g for %d successive iterations: "
                                "waiting until it's been low for five iterations"
                                % (training_cost_prop_change,
                                   training_cost_prop_change_threshold,
                                   below_threshold_its))
                    else:
                        # Reset the below threshold counter
                        below_threshold_its = 0

            if best_weights is not None:
                # Use the weights that gave us the best error on the validation set
                layer.set_weights(best_weights)
    def __init__(self,
                 num_features,
                 num_classes,
                 num_hidden_units=100,
                 normalize_features=False,
                 autoencoder=False,
                 hidden_activation_fn=None,
                 initialization='glorot'):
        self.num_features = num_features
        self.num_classes = num_classes
        self.num_hidden_units = num_hidden_units
        self.normalize_features = normalize_features
        if autoencoder:
            raise NotImplementedError(
                "don't use SingleLayerNetwork any more to train an autoencoder. It has "
                "its own implementation, which is better")

        if hidden_activation_fn is None:
            hidden_activation_fn = nnet.sigmoid

        # Set up Theano network for the model
        # Features: (m, num_features)
        self.x = T.matrix("x", dtype="float64")
        # Classes (one-hot): (m, num_classes)
        self.y = T.vector("y", dtype="int64")

        if normalize_features:
            # Divide feature vector by its Euclidean norm before using the values as inputs
            self.inputs = ifelse(T.gt(self.x.sum(), 0), self.x / T.sqrt(
                (self.x**2).sum()), self.x)
        else:
            self.inputs = self.x

        if initialization == 'gaussian':
            # Weights and bias, randomly initialized
            self.w0 = theano.shared(numpy.random.randn(num_features,
                                                       num_hidden_units),
                                    name="w0")
            self.b0 = theano.shared(numpy.random.randn(num_hidden_units),
                                    name="b0")
            self.w1 = theano.shared(numpy.random.randn(num_hidden_units,
                                                       num_classes),
                                    name="w1")
            self.b1 = theano.shared(numpy.random.randn(num_classes), name="b1")
        elif initialization == 'glorot':
            # Use Glorot & Bengio's initialization scheme, where the range of random weights depends on the number of
            # hidden units in this and the previous layer
            unif_width0 = math.sqrt(6.) / math.sqrt(num_features +
                                                    num_hidden_units)
            self.w0 = theano.shared(numpy.random.uniform(
                -unif_width0, unif_width0, (num_features, num_hidden_units)),
                                    name="w0")
            unif_width1 = math.sqrt(6.) / math.sqrt(num_hidden_units +
                                                    num_classes)
            self.w1 = theano.shared(numpy.random.uniform(
                -unif_width1, unif_width1, (num_hidden_units, num_classes)),
                                    name="w1")
            # Initialize biases to 0
            self.b0 = theano.shared(numpy.zeros(num_hidden_units), name="b0")
            self.b1 = theano.shared(numpy.zeros(num_classes), name="b1")
        elif initialization == 'squashed-gaussian':
            # Similar to initializing with a normalized Gaussian, but squashes to std to 1/sqrt(input_nodes)
            std0 = 1. / math.sqrt(num_features)
            self.w0 = theano.shared(numpy.random.normal(
                0., std0, (num_features, num_hidden_units)),
                                    name="w0")
            std1 = 1. / math.sqrt(num_hidden_units)
            self.w1 = theano.shared(numpy.random.uniform(
                0., std1, (num_hidden_units, num_classes)),
                                    name="w1")
            # Initialize biases to 0
            self.b0 = theano.shared(numpy.zeros(num_hidden_units), name="b0")
            self.b1 = theano.shared(numpy.zeros(num_classes), name="b1")
        else:
            raise ValueError(
                "unknown initialization type '%s'. Choose gaussian, squashed-gaussian or glorot"
                % initialization)

        # Parameter
        self.output_bias = T.scalar("output_bias", dtype="int64")
        self.hidden_bias = T.scalar("hidden_bias", dtype="int64")

        # Construct Theano expression graph
        self.hidden_activation = T.dot(self.inputs, self.w0) + \
                                 ifelse(T.gt(self.hidden_bias, 0), self.b0, T.zeros(self.b0.shape))
        self.output_activation = T.dot(hidden_activation_fn(self.hidden_activation), self.w1) + \
                                 ifelse(T.gt(self.output_bias, 0), self.b1, T.zeros(self.b1.shape))
        # Softmax activations to get a probability distribution over the classes
        self.class_probs = nnet.softmax(self.output_activation)

        # The predicted class is that with highest activation (no need to do the softmax for this)
        self.prediction = T.argmax(self.output_activation, axis=1)
        error = T.mean(T.neq(self.prediction, self.y))

        # Compile
        self._predict_fn = theano.function(inputs=[
            self.x,
            Param(self.output_bias, default=1),
            Param(self.hidden_bias, default=1)
        ],
                                           outputs=self.prediction)
        self._prob_fn = theano.function(inputs=[
            self.x,
            Param(self.output_bias, default=1),
            Param(self.hidden_bias, default=1)
        ],
                                        outputs=self.class_probs)
        self._error_fn = theano.function(
            inputs=[
                self.x, self.y,
                Param(self.output_bias, default=1),
                Param(self.hidden_bias, default=1)
            ],
            outputs=error,
        )
        self.hidden_fn = theano.function(
            inputs=[self.x, Param(self.hidden_bias, default=1)],
            outputs=hidden_activation_fn(self.hidden_activation),
        )

        one_hot_predictions = T.eye(num_classes, num_classes,
                                    dtype="int8")[self.prediction]
        one_hot_targets = T.eye(num_classes, num_classes, dtype="int8")[self.y]

        # Average log-prob of correct answer
        # More useful metric than accuracy, since we can see differences even where the right answer's not top
        mean_target_log_prob = T.mean(
            T.log(self.class_probs[T.arange(self.y.shape[0]), self.y]))
        self.mean_log_prob = theano.function(inputs=[
            self.x, self.y,
            Param(self.output_bias, default=1),
            Param(self.hidden_bias, default=1)
        ],
                                             outputs=mean_target_log_prob)

        # Similar thing, but averaged within classes first, then across
        num_targets = one_hot_targets.sum(axis=0)
        has_targets = T.neq(num_targets, 0.)
        per_class_target_log_prob = T.switch(
            has_targets,
            T.sum(T.log(self.class_probs) * one_hot_targets, axis=0) /
            num_targets, 0.)
        self._mean_per_class_target_log_prob = T.sum(
            per_class_target_log_prob) / T.sum(has_targets)
        self.mean_per_class_target_log_prob = theano.function(
            inputs=[
                self.x, self.y,
                Param(self.output_bias, default=1),
                Param(self.hidden_bias, default=1)
            ],
            outputs=self._mean_per_class_target_log_prob)

        ##### F-score computation
        # True positives per output class
        true_pos = T.cast(T.sum(one_hot_predictions & one_hot_targets, axis=0),
                          dtype="float64")
        # Positive targets per output class
        pos = T.cast(T.sum(one_hot_targets, axis=0), dtype="float64")
        # Predicted positives per output class
        predicted_pos = T.cast(T.sum(one_hot_predictions, axis=0),
                               dtype="float64")

        # If pos==0 (no actual positives) recall is undefined
        # Simple way out of div zero: wherever pos==0, setting pos=1 is fine (since recall==1)
        recalls = T.switch(T.eq(pos, 0), float('nan'), true_pos) / T.switch(
            T.eq(pos, 0), 1., pos)
        # Simple way out of div zero: wherever predicted_pos==0 we're setting num directly, so 1 denom is fine
        precisions = T.switch(
            T.eq(predicted_pos, 0) & T.eq(pos, 0),
            float('nan'),  # Don't penalize precision if there are no positives
            true_pos / T.switch(T.eq(predicted_pos, 0), 1., predicted_pos))
        f_scores = T.switch(
            T.isnan(precisions) | T.isnan(recalls),
            float('nan'),
            2. * precisions * recalls /
            T.switch(precisions + recalls > 0, precisions + recalls, 1.),
        )
        self._precisions_fn = theano.function(
            inputs=[self.x, self.y],
            outputs=precisions,
            givens=[(self.output_bias, 1), (self.hidden_bias, 1)],
        )
        self._recalls_fn = theano.function(
            inputs=[self.x, self.y],
            outputs=recalls,
            givens=[(self.output_bias, 1), (self.hidden_bias, 1)],
        )
        self._f_score_fn = theano.function(
            inputs=[self.x, self.y],
            outputs=[f_scores, precisions, recalls],
            givens=[(self.output_bias, 1), (self.hidden_bias, 1)],
        )
Esempio n. 21
0
    def __init__(self, sequence_length=5):
        self.sequence_length = sequence_length
        self.num_updates = 0
        layers = []
        X = T.matrix().reshape((sequence_length, 512))
        Y = T.ivector()

        is_train = T.scalar()

        inputs = InputLayer(X, name="input")
        layers.append(inputs)

        #drop1 = DropoutLayer(inputs,is_train, p = 0.5, name="Drop 1, p = 0.5")

        lstm1 = LSTMLayer(inputs,
                          512,
                          512,
                          name="LSTM 1",
                          return_sequences=False)

        #lstm2 = LSTMLayer(drop1,512,512,name="LSTM 2",return_sequences=True)

        #drop2 = DropoutLayer(lstm1,is_train, p = 0.5, name="Drop 2, p = 0.5")

        #lstm3 = LSTMLayer(drop2,512,250,name="LSTM 3",return_sequences=False)

        softmax1 = SoftmaxLayer(lstm1, name="softmax")

        layers += [lstm1, softmax1]
        # layers += [lstm1, drop1, drop2, lstm3, softmax1]
        #layers += [lstm1, drop1, lstm2, drop2, lstm3, softmax1]

        predicted_class = T.argmax(softmax1.output())

        cost = CategoricalCrossEntropy(softmax1, Y).output()

        self.layers = layers

        params = get_params(self.layers)
        biases = get_biases(self.layers)

        caches_params = make_caches(params)
        caches_bias = make_caches(biases)
        eta = T.scalar()

        updates = momentum(cost, params, biases, caches_params, caches_bias,
                           eta)

        self.train = theano.function([X, Y, eta, Param(is_train, 1)],
                                     [cost, predicted_class],
                                     updates=updates,
                                     allow_input_downcast=True,
                                     on_unused_input='warn')

        self.predict = theano.function([X, Param(is_train, 0)],
                                       predicted_class,
                                       allow_input_downcast=True,
                                       on_unused_input='warn')

        self.predict_with_drop = theano.function([X, Param(is_train, 1)],
                                                 predicted_class,
                                                 allow_input_downcast=True,
                                                 on_unused_input='warn')

        self.validate = theano.function([X, Y, Param(is_train, 0)],
                                        cost,
                                        allow_input_downcast=True,
                                        on_unused_input='warn')
Esempio n. 22
0
'''
Executing multiple functions
'''
a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(a - b)
diff_sq = diff**2
mult = function([a, b], [diff, abs_diff, diff_sq])
print mult([[0, 1], [1, 2]], [[-1, 2], [5, 7]])
#print pp(diff)
#print pp(abs_diff)
'''
Setting a default value for an argument
So, if arg not give, take default value; else take the given value
'''
x, y = T.dscalars("x", "y")
z = x + y
add = function([x, Param(y, default=1)], z)
print add(33.0)
print add(2, 6)
'''
Setting names to parameters
'''
x, y, w = T.dscalars("x", "y", "w")
z = (x + y) * w
add_par = function(
    [x, Param(y, default=1),
     Param(w, default=2, name="debalu")], z)
print add_par(33)
print add_par(33, 6, debalu=5)
Esempio n. 23
0
    def train(self,
              xs,
              ys,
              iterations=10000,
              iteration_callback=None,
              validation_xs=None,
              validation_ys=None,
              validation_frequency=1,
              learning_rate=0.1,
              regularization=0.01,
              plot_errors=None,
              plot_cost=None):
        """
        Train on data stored in Theano tensors.

        E.g.
        xs = rng.randn(N, num_features)
        ys = rng.randint(size=N, low=0, high=2)

        iteration_callback is called after each iteration with args (iteration, error array).

        """
        learning_rate_var = T.scalar("alpha")
        # Compute the training function
        _train_fn = theano.function(
            inputs=[
                self.x, self.y,
                Param(learning_rate_var, default=0.1),
                Param(self.reg_coef, default=0.01)
            ],
            outputs=self._cost_without_reg,
            updates=[(self.theta, self.theta - learning_rate_var * self.gtheta)
                     ],
        )

        best_validation_error = numpy.inf

        validation_errors = []
        training_errors = []
        costs = []

        for i in range(iterations):
            training_cost = _train_fn(xs,
                                      ys,
                                      alpha=learning_rate,
                                      reg=regularization)

            # Only evaluate on val set every validation_frequencyth iteration
            if validation_xs is not None and (i +
                                              1) % validation_frequency == 0:
                # Compute accuracy on validation set
                validation_error = self.error(validation_xs, validation_ys)
                # Compute accuracy on training set
                training_error = self.error(xs, ys)
                # Compute how much we've improved on the previous best validation error
                if validation_error < best_validation_error:
                    validation_improvement = 0.0
                else:
                    validation_improvement = (
                        validation_error -
                        best_validation_error) / best_validation_error * 100.0
                    best_validation_error = validation_error
            else:
                validation_error = None
                validation_improvement = None
                training_error = None

            if iteration_callback is not None:
                iteration_callback(i, training_cost, training_error,
                                   validation_error, validation_improvement)

            # Plot some graphs
            if plot_cost:
                costs.append(training_cost)
                plot_costs(plot_cost, (costs, "training cost"))
            if plot_errors and validation_error is not None:
                validation_errors.append(validation_error)
                training_errors.append(training_error)
                plot_costs(plot_errors,
                           (training_errors, "training set error"),
                           (validation_errors, "val set error"))
Esempio n. 24
0
import theano
import theano.tensor as T
from theano import Param
from theano.tensor.shared_randomstreams import RandomStreams

x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))
logistic = theano.function([x], s)
y = logistic([[0, 1], [-1, -2]])
#print y

x, y, w = T.dscalars('x', 'y', 'w')
z = (x + y) * w
f = theano.function([x, Param(y, default=1), Param(w, default=2)], z)
print f(9, w = 1, y = 2)

srng = RandomStreams(seed = 234)
rv_u = srng.uniform((2,2))
rv_n = srng.normal((2,2))
f = theano.function([], rv_u)
z = theano.function([], rv_u + rv_u - 2 * rv_u)
print z
print z()
print rv_u
print rv_n
Esempio n. 25
0
    def train(self,
              xs,
              iterations=10000,
              iteration_callback=None,
              batch_size=20,
              batch_callback=None,
              validation_set=None,
              stopping_iterations=10,
              log=None,
              cost_plot_filename=None,
              training_cost_prop_change_threshold=0.0005,
              learning_rate=0.1,
              regularization=None,
              class_weights=None,
              corruption_level=0.,
              continuous_corruption=False,
              loss="xent"):
        """
        Train on data stored in Theano tensors. Uses minibatch training.

        xs are the vectors to train on. Targets needn't be given, since the input and output are the
        same in an autoencoder.

        iteration_callback is called after each iteration with args (iteration, error array).

        If a validation set (matrix) is given, it is used to compute an error after each iteration
        and to enforce a stopping criterion. The algorithm will terminate if it goes stopping_iterations
        iterations without an improvement in validation error.

        If compute_error_frequency > 1 (default=5), this number of iterations are performed between each time
        the error is computed on the training set.

        The algorithm will assume it has converged and stop early if the proportional change between successive
        training costs drops below training_cost_prop_change_threshold for five iterations in a row.

        Uses L2 regularization.

        Several params are included just to implement the same interface as single_hidden_layer.
        Might want to change this later to be a bit neater.

        """
        if log is None:
            log = get_console_logger("Autoencoder train")

        log.info(
            "Training params: learning rate=%s, noise ratio=%.1f%% (%s), regularization=%.2f"
            % (learning_rate, self.network.corruption_level * 100.0,
               "continuous corruption" if self.network.continuous_corruption
               else "zeroing corruption", regularization))
        log.info("Training with SGD, batch size=%d" % batch_size)

        if class_weights is None:
            # Don't apply any weighting
            class_weights_vector = None
        elif class_weights == "freq":
            # Apply inverse frequency weighting
            class_counts = numpy.maximum(xs.sum(axis=0), 1.0)
            class_weights_vector = 1. / class_counts
            class_weights_vector *= xs.shape[1] / class_weights_vector.sum()
            log.info(
                "Using inverse frequency class weighting in cost function")
        elif class_weights == "log":
            class_counts = numpy.maximum(xs.sum(axis=0), 1.0)
            class_weights_vector = 1. / (numpy.log(class_counts) + 1.)
            class_weights_vector *= xs.shape[1] / class_weights_vector.sum()
            log.info(
                "Using inverse log frequency class weighting in cost function")
        else:
            raise ValueError("invalid class weighting '%s'" % class_weights)

        ######## Compile functions
        # Prepare cost/update functions for training
        cost, updates = self.network.get_cost_updates(
            self.learning_rate,
            self.regularization,
            class_cost_weights=class_weights_vector,
            corruption_level=corruption_level,
            continuous_corruption=continuous_corruption,
            loss=loss)
        # Prepare training functions
        cost_fn = theano.function(
            inputs=[self.network.x,
                    Param(self.regularization, default=0.0)],
            outputs=cost,
        )
        train_fn = theano.function(
            inputs=[
                self.network.x,
                Param(self.learning_rate, default=0.1),
                Param(self.regularization, default=0.0)
            ],
            outputs=cost,
            updates=updates,
        )
        # Prepare a function to test how close to the identity function the learned mapping is
        # A lower value indicates that it's generalizing more (though not necessarily better)
        identity_ratio = T.mean(
            T.sum(self.network.get_prediction_dist() * (self.network.x > 0),
                  axis=1))
        identity_ratio_fn = theano.function(inputs=[self.network.x],
                                            outputs=identity_ratio)
        ###########

        # Throw away ys in validation set
        validation_set = validation_set[0]

        # Prepare a prediction validation set by holding one event out of every chain in the val set
        prediction_targets = numpy.array([
            random.choice(numpy.where(x_row > 0)[0])
            for x_row in validation_set
        ],
                                         dtype=numpy.int16)
        prediction_contexts = validation_set.copy()
        prediction_contexts[range(prediction_contexts.shape[0]),
                            prediction_targets] = 0.
        prediction_balanced_sample = balanced_array_sample(prediction_targets,
                                                           balance_ratio=4.,
                                                           min_inclusion=1)
        prediction_targets = prediction_targets[prediction_balanced_sample]
        prediction_contexts = prediction_contexts[prediction_balanced_sample]
        log.info(
            "Prepared roughly balanced prediction set from validation set with %d examples"
            % prediction_contexts.shape[0])

        # Work out how many batches to do
        if batch_size is None or batch_size == 0:
            num_batches = 1
        else:
            num_batches = xs.shape[0] / batch_size
            if xs.shape[0] % batch_size != 0:
                num_batches += 1

        # Keep a record of costs, so we can plot them
        val_costs = []
        training_costs = []

        # Compute costs using the initialized network
        training_cost = cost_fn(xs)
        training_costs.append(training_cost)
        if validation_set is not None:
            val_cost = cost_fn(validation_set)
            val_costs.append(val_cost)
        else:
            val_cost = None

        log.info("Computing initial validation scores")
        f_score, precision, recall, f_score_classes = self.compute_f_scores(
            validation_set)
        log.info("F-score: %.4f%% (mean over %d classes), P=%.4f%%, R=%.4f%%" %
                 (f_score * 100.0, f_score_classes, precision * 100.0,
                  recall * 100.0))
        log_prob = self.network.prediction_log_prob(prediction_contexts,
                                                    prediction_targets)
        log.info("Logprob = %.4g" % log_prob)
        gen_log_prob = self.network.generalization_log_prob(
            prediction_contexts, prediction_targets)
        log.info("Generalization logprob = %.4g" % gen_log_prob)
        identity_ratio = identity_ratio_fn(validation_set)
        log.info("Identity ratio = %.4g" % identity_ratio)

        # Keep a copy of the best weights so far
        best_weights = best_iter = best_val_cost = None
        if validation_set is not None:
            best_weights = self.network.get_weights()
            best_iter = -1
            best_val_cost = val_cost

        below_threshold_its = 0

        for i in range(iterations):
            # Shuffle the training data between iterations, as one should with SGD
            shuffle = numpy.random.permutation(xs.shape[0])
            xs[:] = xs[shuffle]

            err = 0.0
            if num_batches > 1:
                for batch in range(num_batches):
                    # Update the model with this batch's data
                    batch_err = train_fn(xs[batch * batch_size:(batch + 1) *
                                            batch_size],
                                         learning_rate=learning_rate,
                                         regularization=regularization)
                    err += batch_err

                    if batch_callback is not None:
                        batch_callback(batch, num_batches, batch_err)
            else:
                # Batch training: no need to loop
                ### Always perform one batch iteration to start with to get us into a good part of the space
                train_fn(xs,
                         learning_rate=learning_rate,
                         regularization=regularization)

            # Go back and compute training cost
            training_cost = cost_fn(xs)
            training_costs.append(training_cost)

            if validation_set is not None:
                # Compute the cost function on the validation set
                val_cost = cost_fn(validation_set)
                val_costs.append(val_cost)
                if val_cost <= best_val_cost:
                    # We assume that, if the validation error remains the same, it's better to use the new set of
                    # weights (with, presumably, a better training error)
                    if val_cost == best_val_cost:
                        log.info(
                            "Same validation cost: %.4f, using new weights" %
                            val_cost)
                    else:
                        log.info("New best validation cost: %.4f" % val_cost)
                    # Update our best estimate
                    best_weights = self.network.get_weights()
                    best_iter = i
                    best_val_cost = val_cost
                if val_cost >= best_val_cost and i - best_iter >= stopping_iterations:
                    # We've gone on long enough without improving validation error
                    # Time to call a halt and use the best validation error we got
                    log.info(
                        "Stopping after %d iterations of increasing validation cost"
                        % stopping_iterations)
                    break

            log.info(
                "COMPLETED ITERATION %d: training cost=%.5f, val cost=%.5f" %
                (i, training_cost, val_cost))

            if cost_plot_filename:
                # Plot the cost function as we train
                # Skip the first costs, as they're usually so much higher than others that the rest is indistinguishable
                columns = [(training_costs[1:], "Train cost")]
                if validation_set is not None:
                    columns.append((val_costs[1:], "Val cost"))
                ax = plot_costs(None, *columns)
                # Add a line at the most recent best val cost
                ax.axvline(float(best_iter), color="b")
                ax.text(float(best_iter + 1) + 0.1,
                        best_val_cost * 1.1,
                        "Best val cost",
                        color="b")
                plt.savefig(cost_plot_filename)

            f_score, precision, recall, f_score_classes = self.compute_f_scores(
                validation_set)
            log.info(
                "Validation f-score: %.4f%% (mean over %d classes), P=%.4f%%, R=%.4f%%"
                % (f_score * 100.0, f_score_classes, precision * 100.0,
                   recall * 100.0))
            #log_prob = self.network.prediction_log_prob(prediction_contexts, prediction_targets)
            #log.info("Prediction logprob = %.4g" % log_prob)
            gen_log_prob = self.network.generalization_log_prob(
                prediction_contexts, prediction_targets)
            log.info("Generalization logprob = %.4g" % gen_log_prob)
            identity_ratio = identity_ratio_fn(validation_set)
            log.info("Validation identity ratio = %.4g" % identity_ratio)

            if iteration_callback is not None:
                # Not computing training error at the moment
                iteration_callback(i, training_cost, val_cost, 0.0, best_iter)

            # Check the proportional change between this iteration's training cost and the last
            if len(training_costs) > 2:
                training_cost_prop_change = abs(
                    (training_costs[-2] - training_costs[-1]) /
                    training_costs[-2])
                if training_cost_prop_change < training_cost_prop_change_threshold:
                    # Very small change in training cost - maybe we've converged
                    below_threshold_its += 1
                    if below_threshold_its >= 5:
                        # We've had enough iterations with very small changes: we've converged
                        log.info(
                            "Proportional change in training cost (%g) below %g for five successive iterations: "
                            "converged" %
                            (training_cost_prop_change,
                             training_cost_prop_change_threshold))
                        break
                    else:
                        log.info(
                            "Proportional change in training cost (%g) below %g for %d successive iterations: "
                            "waiting until it's been low for five iterations" %
                            (training_cost_prop_change,
                             training_cost_prop_change_threshold,
                             below_threshold_its))
                else:
                    # Reset the below threshold counter
                    below_threshold_its = 0

        if best_weights is not None:
            # Use the weights that gave us the best error on the validation set
            self.network.set_weights(best_weights)