Example #1
0
 def default_training(self):
     input_var = tensor.tensor4('inputs')
     target_var = tensor.ivector('targets')
     loss, _ = loss_acc(self.model,
                        input_var,
                        target_var,
                        deterministic=False)
     loss += regularize_layer_params(get_all_layers(self.model),
                                     l2,
                                     tags={
                                         'regularizable': True,
                                         'layer_weight': False
                                     }) * 1e-4
     # TODO : does this count as weight decay (...*1e-4) or not?
     # the learning rate is 1/100 of the normal learning rate
     # ... but we just adapt the decay
     loss += regularize_layer_params(
         get_all_layers(self.model), l2, tags={'layer_weight': True}) * 1e-6
     params = get_all_params(self.model, trainable=True)
     # updates = adam(loss, params, learning_rate=self.learning_rate)
     updates = self.momentum_method(loss,
                                    params,
                                    momentum=self.momentum,
                                    learning_rate=self.learning_rate)
     for weight in get_all_params(self.model,
                                  trainable=True,
                                  tags={'layer_weight': True}):
         # all residual weights are in [-1, 1]
         assert weight in updates
         updates[weight] = tensor.minimum(
             1.0, tensor.maximum(-1.0, updates[weight]))
     self.set_training(input_var, target_var, loss, updates)
Example #2
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    """
    Default implementation of the NeuralNet Objective.
    :param layers: The underlying layers of the NeuralNetwork
    :param loss_function: The callable loss function to use
    :param target: the expected output

    :param aggregate: the aggregation function to use
    :param deterministic: Whether or not to get a deterministic output
    :param l1: Optional l1 regularization parameter
    :param l2: Optional l2 regularization parameter
    :param get_output_kw: optional kwargs to pass to :meth:`NeuralNetwork.get_output`
    :return: The total calculated loss
    """
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    loss = aggregate(loss_function(network_output, target))

    if l1:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l2) * l2
    return loss
Example #3
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              aggregation_weights=None,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}

    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    if isfunction(aggregation_weights):
        weights = aggregation_weights(layers)
    else:
        weights = aggregation_weights
    loss = aggregate(loss_function(network_output, target), weights)

    if l1:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l2) * l2
    return loss
Example #4
0
def cost_network(network, target_var, l1_reg, l2_reg, learn, train_layers=[], output_layer=[]):
	#for key in dont_train:
	#	network[key].params[network[key].W].remove("trainable")
	#	network[key].params[network[key].b].remove("trainable")
	#Basic loss is negative loss likelihood
	network_out = network[output_layer]
	prediction = lasagne.layers.get_output(network_out)
	loss = T.mean(T.nnet.categorical_crossentropy(prediction,target_var))
	#Shared costs
	l1_penalty = regularize_layer_params(network_out, l1) * l1_reg
	l2_penalty = regularize_layer_params(network_out, l2) * l2_reg
	cost = loss + l2_penalty + l1_penalty
	
	#params = lasagne.layers.get_all_params(network_out, trainable=True)
	#print(params)
	params=[]
	for p in train_layers:
		params.append(network[p].get_params(trainable=True))
	
	params = [item for sublist in params for item in sublist]
	
	print([i.eval().shape for i in params])
	print(params)	
	print(train_layers)
	print("----")
	updates = lasagne.updates.sgd(cost, params, learning_rate=learn)
	return([cost, updates, loss])
Example #5
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              l1=0,
              l2=0,
              tv=0,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    loss = aggregate(loss_function(network_output, target))

    if l1:
        loss += regularization.regularize_layer_params(
            layers[-2], regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers[-2], regularization.l2) * l2
    if tv:
        loss += T.mean(T.abs_(network_output[:, 1:] -
                              network_output[:, :-1]))*tv
    return loss
Example #6
0
 def compile_logistic_model(self, lamda, input_params=None):
     X,Y = self.X,self.Y
     net = self.build_model(X)
     network = net['l_out']
     self.net_logistic = network
     prediction = lasagne.layers.get_output(network)
     loss = lasagne.objectives.categorical_crossentropy(prediction, Y)
     loss = loss.mean() 
     for key in net.keys():
         loss += lamda*regularize_layer_params(net[key], l2) + \
                 lamda*regularize_layer_params(net[key], l1) 
     if input_params:
         print"Compiling classifier with input params..."
         lasagne.layers.set_all_param_values( network,
                                              [i.get_value() for i in input_params])
     params = lasagne.layers.get_all_params(network)
     self.inst_params = params
     updates = lasagne.updates.nesterov_momentum(
         loss, params, learning_rate=0.01, momentum=0.9)
     
     test_prediction = lasagne.layers.get_output(network, deterministic=True)
     test_prediction = T.argmax(test_prediction, axis=1)
     train = theano.function([X, Y], loss, updates=updates, allow_input_downcast=True)      
     predict = theano.function([X], test_prediction, allow_input_downcast=True)
     print "Done Compiling logistic model..."
     return train,predict
Example #7
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              mode='mean',
              weights=None,
              deterministic=False,
              l1=0,
              l2=0,
              l3=0,
              l3_layers=[],
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    loss = aggregate(loss_function(network_output, target), weights=weights, mode=mode)

    if l1:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l2) * l2
    if l3:
        for layer in l3_layers:
            loss += regularization.regularize_layer_params(
                layer, regularization.l2) * l3
    return loss
Example #8
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              aggregation_weights=None,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}

    output_layer = layers[-1]
    network_output = get_output(output_layer,
                                deterministic=deterministic,
                                **get_output_kw)
    if isfunction(aggregation_weights):
        weights = aggregation_weights(layers)
    else:
        weights = aggregation_weights
    loss = aggregate(loss_function(network_output, target), weights)

    if l1:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l2) * l2
    return loss
Example #9
0
    def build_qn_type_model(self, from_scratch=False):
        qtype, qembd = self.qtype, self.qembd
        qX, mask = self.qX, self.lstm_mask
        if from_scratch:
            #q_bow_net = self.build_question_boW(qX)
            #q_bow = lasagne.layers.get_output(q_bow_net['l_embd'])
            #l2_penalty_qbow = regularize_layer_params(q_bow_net['l_embd'], l2)
            #qbow_params = lasagne.layers.get_all_params(q_bow_net['l_embd'])
            #qembd = T.sum(q_bow,axis=1)
            q_lstm_net = self.build_qn_classifier_lstm(qX, mask)
            qlstm_params = lasagne.layers.get_all_params(q_lstm_net['l_dense'])
            l2_penalty_qlstm = regularize_layer_params(q_lstm_net['l_dense'],
                                                       l2)
            #l2_penalty_qlstm += regularize_layer_params(q_lstm_net['l_lstm'], l2)
            qembd = lasagne.layers.get_output(q_lstm_net['l_dense'])
        q_type_net = self.build_qn_classifier_mlp(qembd)
        q_type_pred = lasagne.layers.get_output(q_type_net['l_out'],
                                                deterministic=False)
        l2_penalty_mlp = regularize_layer_params(q_type_net['l_out'], l2)
        loss = lasagne.objectives.categorical_crossentropy(q_type_pred, qtype)
        loss = loss.mean() + l2_penalty_mlp
        loss += l2_penalty_qlstm
        params = []
        qmlp_params = lasagne.layers.get_all_params(q_type_net['l_out'])
        for p in qmlp_params:
            params.append(p)
        for p in qlstm_params:
            params.append(p)
        all_grads = T.grad(loss, params)
        if self.grad_clip != None:
            all_grads = [
                T.clip(g, self.grad_clip[0], self.grad_clip[1])
                for g in all_grads
            ]

        updates = lasagne.updates.adam(all_grads, params, learning_rate=0.003)
        qtype_test_pred = lasagne.layers.get_output(q_type_net['l_out'],
                                                    deterministic=True)
        qtype_test_pred = T.argmax(qtype_test_pred, axis=1)
        print "Compiling..."
        self.timer.set_checkpoint('compile')
        if from_scratch:
            train = theano.function([qX, mask, qtype],
                                    loss,
                                    updates=updates,
                                    allow_input_downcast=True)
            qtype_predict = theano.function([qX, mask],
                                            qtype_test_pred,
                                            allow_input_downcast=True)
        else:
            train = theano.function([qembd, qtype],
                                    loss,
                                    updates=updates,
                                    allow_input_downcast=True)
            qtype_predict = theano.function([qembd],
                                            qtype_test_pred,
                                            allow_input_downcast=True)
        print "Compile time(mins)", self.timer.print_checkpoint('compile')
        print "Done Compiling qtype model..."
        return train, qtype_predict
Example #10
0
    def compile_logistic_model(self, lamda, input_params=None):
        X, Y = self.X, self.Y
        net = self.build_model(X)
        network = net['l_out']
        self.net_logistic = network
        prediction = lasagne.layers.get_output(network)
        loss = lasagne.objectives.categorical_crossentropy(prediction, Y)
        loss = loss.mean()
        for key in net.keys():
            loss += lamda*regularize_layer_params(net[key], l2) + \
                    lamda*regularize_layer_params(net[key], l1)
        if input_params:
            print "Compiling classifier with input params..."
            lasagne.layers.set_all_param_values(
                network, [i.get_value() for i in input_params])
        params = lasagne.layers.get_all_params(network)
        self.inst_params = params
        updates = lasagne.updates.nesterov_momentum(loss,
                                                    params,
                                                    learning_rate=0.01,
                                                    momentum=0.9)

        test_prediction = lasagne.layers.get_output(network,
                                                    deterministic=True)
        test_prediction = T.argmax(test_prediction, axis=1)
        train = theano.function([X, Y],
                                loss,
                                updates=updates,
                                allow_input_downcast=True)
        predict = theano.function([X],
                                  test_prediction,
                                  allow_input_downcast=True)
        print "Done Compiling logistic model..."
        return train, predict
Example #11
0
    def _get_loss_updates(self,
    L1_reg = 0.0, L2_reg = 0.001,
    update_fn = lasagne.updates.nesterov_momentum,
    max_norm = None, deterministic = False,
    momentum = 0.9,
    **kwargs):
        """
        Returns Theano expressions for the network's loss function and parameter
            updates.

        Parameters:
            L1_reg: float for L1 weight regularization coefficient.
            L2_reg: float for L2 weight regularization coefficient.
            max_norm: If not None, constraints the norm of gradients to be less
                than max_norm.
            deterministic: True or False. Determines if the output of the network
                is calculated determinsitically.
            update_fn: lasagne update function.
                Default: Stochastic Gradient Descent with Nesterov momentum
            **kwargs: additional parameters to provide to update_fn.
                For example: momentum

        Returns:
            loss: Theano expression for a penalized negative log likelihood.
            updates: Theano expression to update the parameters using update_fn.
        """

        loss = (
            self._negative_log_likelihood(self.E, deterministic)
            + regularize_layer_params(self.network,l1) * L1_reg
            + regularize_layer_params(self.network, l2) * L2_reg
        )

        if max_norm:
            grads = T.grad(loss,self.params)
            scaled_grads = lasagne.updates.total_norm_constraint(grads, max_norm)
            updates = update_fn(
                scaled_grads, self.params, **kwargs
            )
        else:
            updates = update_fn(
                loss, self.params, **kwargs
            )

        if momentum:
            updates = lasagne.updates.apply_nesterov_momentum(updates, 
                self.params, self.learning_rate, momentum=momentum)

        # If the model was loaded from file, reload params
        if self.restored_update_params:
            for p, value in zip(updates.keys(), self.restored_update_params):
                p.set_value(value)
            self.restored_update_params = None

        # Store last update function to be later saved
        self.updates = updates

        return loss, updates
Example #12
0
    def _get_loss_updates(self,
                          L1_reg=0.0,
                          L2_reg=0.001,
                          update_fn=lasagne.updates.nesterov_momentum,
                          max_norm=None,
                          deterministic=False,
                          momentum=0.9,
                          **kwargs):
        """
        Returns Theano expressions for the network's loss function and parameter
            updates.

        Parameters:
            L1_reg: float for L1 weight regularization coefficient.
            L2_reg: float for L2 weight regularization coefficient.
            max_norm: If not None, constraints the norm of gradients to be less
                than max_norm.
            deterministic: True or False. Determines if the output of the network
                is calculated determinsitically.
            update_fn: lasagne update function.
                Default: Stochastic Gradient Descent with Nesterov momentum
            **kwargs: additional parameters to provide to update_fn.
                For example: momentum

        Returns:
            loss: Theano expression for a penalized negative log likelihood.
            updates: Theano expression to update the parameters using update_fn.
        """

        loss = (self._negative_log_likelihood(self.E, deterministic) +
                regularize_layer_params(self.network, l1) * L1_reg +
                regularize_layer_params(self.network, l2) * L2_reg)

        if max_norm:
            grads = T.grad(loss, self.params)
            scaled_grads = lasagne.updates.total_norm_constraint(
                grads, max_norm)
            updates = update_fn(scaled_grads, self.params, **kwargs)
        else:
            updates = update_fn(loss, self.params, **kwargs)

        if momentum:
            updates = lasagne.updates.apply_nesterov_momentum(
                updates, self.params, self.learning_rate, momentum=momentum)

        # If the model was loaded from file, reload params
        if self.restored_update_params:
            for p, value in zip(updates.keys(), self.restored_update_params):
                p.set_value(value)
            self.restored_update_params = None

        # Store last update function to be later saved
        self.updates = updates

        return loss, updates
Example #13
0
    def cost_ELBO(self,
                  Y=None,
                  X=None,
                  padleft=False,
                  sample_strategy='with_symb_noise',
                  regularize_evolution_weights=False):
        """
        """
        if Y is None: Y = self.Y
        if X is None: X = self.X

        mrec = self.get_RecModel()
        mgen = self.get_GenModel()
        postX = self.get_symb_postX(Y, X, sample_strategy)

        if regularize_evolution_weights:
            from lasagne.layers import get_all_layers
            from lasagne.regularization import regularize_layer_params, l2
            lat_ev_layers = get_all_layers(self.lat_ev_model.NNEvolve)
            lat_weights_regloss = regularize_layer_params(lat_ev_layers[1], l2)

        Nsamps = Y.shape[0]
        LogDensity = mgen.compute_LogDensity(Y, postX, padleft=padleft)
        Entropy = mrec.compute_Entropy(Y, postX)
        ELBO = (LogDensity +
                Entropy if not regularize_evolution_weights else LogDensity +
                Entropy + lat_weights_regloss)
        costs_func = theano.function(
            inputs=self.CostsInputDict['ELBO'],
            outputs=[ELBO / Nsamps, LogDensity / Nsamps, Entropy / Nsamps])

        return ELBO, costs_func
Example #14
0
def define_loss(network, targets):
    prediction = lasagne.layers.get_output(network)


    loss = lasagne.objectives.categorical_crossentropy(prediction, targets)
    loss = loss.mean()

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, targets)
    test_loss = test_loss.mean()

    if params.REGULARIZATION:
        regularization_penalty = regularize_layer_params(network, l2) * params.REGULARIZATION_WEIGHT

        loss = loss + regularization_penalty
        test_loss = test_loss + regularization_penalty



    acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), targets),
                dtype=theano.config.floatX)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([inputs, targets], [test_prediction, test_loss, acc])

    return loss, val_fn
Example #15
0
    def build_train_loss(self, train_output, target_values):

        l2_penalty = regularize_layer_params(self.layers, l2) * self.l2_reg_weight
        loss = self.msq_err(train_output, target_values)
        loss += l2_penalty

        return loss
Example #16
0
def build_model(n_input,
                n_hidden,
                optimizer=adagrad,
                l2_weight=1e-4,
                l1_weight=1e-2):
    '''
	build NN model to estimating model function
	'''
    global LR

    input_A = L.InputLayer((None, n_input), name='A')
    layer_A = L.DenseLayer(input_A, n_hidden, b=None, nonlinearity=identity)

    input_B = L.InputLayer((None, n_input), name='B')
    layer_B = L.DenseLayer(input_B, n_hidden, b=None, nonlinearity=identity)

    merge_layer = L.ElemwiseSumLayer((layer_A, layer_B))

    output_layer = L.DenseLayer(merge_layer, 1, b=None,
                                nonlinearity=identity)  # output is scalar

    x1 = T.matrix('x1')
    x2 = T.matrix('x2')
    y = T.matrix('y')

    out = L.get_output(output_layer, {input_A: x1, input_B: x2})
    params = L.get_all_params(output_layer)
    loss = T.mean(squared_error(out, y))

    # add l1 penalty
    l1_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l1)

    # add l2 penalty
    l2_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l2)

    # get loss + penalties
    loss = loss + l1_penalty * l1_weight + l2_penalty * l2_weight

    updates_sgd = optimizer(loss, params, learning_rate=LR)
    updates = apply_momentum(updates_sgd, params, momentum=0.9)
    # updates = optimizer(loss,params,learning_rate=LR)

    f_train = theano.function([x1, x2, y], loss, updates=updates)
    f_test = theano.function([x1, x2, y], loss)
    f_out = theano.function([x1, x2], out)

    return f_train, f_test, f_out, output_layer
Example #17
0
def objective_with_L2(layers, loss_function, target, aggregate=aggregate, deterministic=False, get_output_kw=None):
    reg = regularize_layer_params([layers["hidden5"]], l2)
    loss = objective(layers, loss_function, target, aggregate, deterministic, get_output_kw)

    if deterministic is False:
        return loss + reg * lambda_regularization
    else:
        return loss
Example #18
0
    def build_train_loss(self, train_output, target_values):

        l2_penalty = regularize_layer_params(self.layers, l2) * self.l2_reg_weight
        loss = T.nnet.categorical_crossentropy(
                train_output, target_values).mean()
        loss += l2_penalty

        return loss
Example #19
0
    def _get_loss_updates(self,
    L1_reg = 0.0, L2_reg = 0.001,
    update_fn = lasagne.updates.nesterov_momentum,
    max_norm = None, deterministic = False,
    **kwargs):
        """
        Returns Theano expressions for the network's loss function and parameter
            updates.

        Parameters:
            L1_reg: float for L1 weight regularization coefficient.
            L2_reg: float for L2 weight regularization coefficient.
            max_norm: If not None, constraints the norm of gradients to be less
                than max_norm.
            deterministic: True or False. Determines if the output of the network
                is calculated determinsitically.
            update_fn: lasagne update function.
                Default: Stochastic Gradient Descent with Nesterov momentum
            **kwargs: additional parameters to provide to update_fn.
                For example: momentum

        Returns:
            loss: Theano expression for a penalized negative log likelihood.
            updates: Theano expression to update the parameters using update_fn.
        """

        loss = (
            self._negative_log_likelihood(self.E, deterministic)
            + regularize_layer_params(self.network,l1) * L1_reg
            + regularize_layer_params(self.network, l2) * L2_reg
        )

        if max_norm:
            grads = T.grad(loss,self.params)
            scaled_grads = lasagne.updates.total_norm_constraint(grads, max_norm)
            updates = update_fn(
                grads, self.params, **kwargs
            )
            return loss, updates

        updates = update_fn(
                loss, self.params, **kwargs
            )

        return loss, updates
    def test_regularize_layer_params_single_layer(self, layers):
        from lasagne.regularization import regularize_layer_params
        l_1, l_2, l_3 = layers

        penalty = Mock(return_value=0)
        loss = regularize_layer_params(l_2, penalty)

        assert penalty.call_count == 1
        penalty.assert_any_call(l_2.W)
    def test_regularize_layer_params_single_layer(self, layers):
        from lasagne.regularization import regularize_layer_params
        l_1, l_2, l_3 = layers

        penalty = Mock(return_value=0)
        loss = regularize_layer_params(l_2, penalty)

        assert penalty.call_count == 1
        penalty.assert_any_call(l_2.W)
Example #22
0
    def build_mlp(self, input_var=None, dropout_rate=0.5, l2_reg=0., l1_reg=0.):
        # This creates an MLP of two hidden layers of 800 units each, followed by
        # a softmax output layer of 10 units. It applies 20% dropout to the input
        # data and 50% dropout to the hidden layers.

        # Input layer, specifying the expected input shape of the network
        # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
        # linking it to the given Theano variable `input_var`, if any:
        l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                         input_var=input_var)

        # Apply 20% dropout to the input data:
        #l_in_drop = lasagne.layers.DropoutLayer(l_in, p=dropout_rate)

        # Add a fully-connected layer of 800 units, using the linear rectifier, and
        # initializing weights with Glorot's scheme (which is the default anyway):
        l_hid1 = lasagne.layers.DenseLayer(
                l_in, num_units=800,
                nonlinearity=lasagne.nonlinearities.rectify,
                W=lasagne.init.GlorotUniform())

        # We'll now add dropout of 50%:

        self.l2_penalty = regularize_layer_params(l_hid1, l2)
        self.l1_penalty = regularize_layer_params(l_hid1, l1)

        l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=dropout_rate)

        # Another 800-unit layer:
        #l_hid2 = lasagne.layers.DenseLayer(
        #       l_hid1_drop, num_units=800,
        #        nonlinearity=lasagne.nonlinearities.rectify)

        # 50% dropout again:
        #l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=dropout_rate)

        # Finally, we'll add the fully-connected output layer, of 10 softmax units:
        l_out = lasagne.layers.DenseLayer(
                l_hid1_drop, num_units=10,
                nonlinearity=lasagne.nonlinearities.softmax)

        # Each layer is linked to its incoming layer(s), so we only need to pass
        # the output layer to give access to a network in Lasagne:
        return l_out
    def build_qn_type_model(self, from_scratch=False):
        qtype,qembd = self.qtype,self.qembd
        qX, mask =  self.qX, self.lstm_mask
        if from_scratch:
            #q_bow_net = self.build_question_boW(qX)
            #q_bow = lasagne.layers.get_output(q_bow_net['l_embd'])
            #l2_penalty_qbow = regularize_layer_params(q_bow_net['l_embd'], l2)
            #qbow_params = lasagne.layers.get_all_params(q_bow_net['l_embd'])
            #qembd = T.sum(q_bow,axis=1)
            q_lstm_net = self.build_qn_classifier_lstm(qX, mask)
            qlstm_params = lasagne.layers.get_all_params(q_lstm_net['l_dense'])
            l2_penalty_qlstm = regularize_layer_params(q_lstm_net['l_dense'], l2)
            #l2_penalty_qlstm += regularize_layer_params(q_lstm_net['l_lstm'], l2)
            qembd = lasagne.layers.get_output(q_lstm_net['l_dense'])
        q_type_net = self.build_qn_classifier_mlp(qembd)
        q_type_pred = lasagne.layers.get_output(q_type_net['l_out'],deterministic=False)
        l2_penalty_mlp = regularize_layer_params(q_type_net['l_out'], l2)
        loss = lasagne.objectives.categorical_crossentropy(q_type_pred, qtype)
        loss = loss.mean() + l2_penalty_mlp
        loss += l2_penalty_qlstm
        params = []
        qmlp_params = lasagne.layers.get_all_params(q_type_net['l_out'])
        for p in qmlp_params:
            params.append(p)
        for p in qlstm_params:
            params.append(p)
        all_grads = T.grad(loss, params)
        if self.grad_clip != None:
            all_grads = [T.clip(g, self.grad_clip[0], self.grad_clip[1]) for g in all_grads]

        updates = lasagne.updates.adam(all_grads, params, learning_rate=0.003)
        qtype_test_pred = lasagne.layers.get_output(q_type_net['l_out'],deterministic=True)
        qtype_test_pred = T.argmax(qtype_test_pred, axis=1)
        print "Compiling..."
        self.timer.set_checkpoint('compile')
        if from_scratch:
            train = theano.function([qX,mask, qtype], loss, updates=updates, allow_input_downcast=True)
            qtype_predict = theano.function([qX,mask], qtype_test_pred, allow_input_downcast=True)
        else:
            train = theano.function([qembd, qtype], loss, updates=updates, allow_input_downcast=True)
            qtype_predict = theano.function([qembd], qtype_test_pred, allow_input_downcast=True)
        print "Compile time(mins)", self.timer.print_checkpoint('compile')
        print "Done Compiling qtype model..."
        return train, qtype_predict
def loss_function(net, prediction, targets):

    # We use L2 Norm for regularization
    l2_reg = regularization.regularize_layer_params(
        net, regularization.l2) * cfg.L2_WEIGHT

    # Calculate the loss
    loss = calc_loss(prediction, targets) + l2_reg

    return loss
Example #25
0
def get_loss(prediction,in_var,target_var,all_layers,l1_reg=True):    
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    l_hid=all_layers["out"]
    reg_param=0.001
    if(l1_reg):
        l1_penalty = regularize_layer_params(l_hid, l1) * reg_param
        return loss + l1_penalty
    else:
        return loss
Example #26
0
def weight_decay_objective(layers,
                        loss_function,
                        target,
                        penalty_conv=1e-8,
                        penalty_conv_type = l2,
                        penalty_output=1e-8,
                        penalty_output_type = l2,
                        aggregate=aggregate,
                        deterministic=False,
                        get_output_kw={}):
    '''
    Defines L2 weight decay on network weights. 
    '''
    net_out = get_output(layers[-1], deterministic=deterministic,
                        **get_output_kw)
    loss = loss_function(net_out, target)
    p1 = penalty_conv * regularize_layer_params(layers[1], penalty_conv_type)
    p2 = penalty_output * regularize_layer_params(layers[-1], penalty_output_type)
    losses = loss + p1 + p2
    return aggregate(losses)
Example #27
0
    def build_nn(cls, n_model, n_units=100):
        ### current params + response + grads
        in_l = layers.InputLayer(shape=(
            None,
            2 * n_model + 1,
        ),
                                 name='input_params')

        dense1 = layers.DenseLayer(in_l,
                                   num_units=n_units,
                                   nonlinearity=nonlinearities.tanh)

        out_l = layers.DenseLayer(dense1,
                                  num_units=n_model,
                                  nonlinearity=nonlinearities.linear)

        reg = \
          regularization.regularize_layer_params(dense1, regularization.l2) + \
          regularization.regularize_layer_params(out_l, regularization.l2)

        return in_l, out_l, reg
    def build_mlp(self, input_var=None, dropout_rate=0.5, l2_reg=0., l1_reg=0.):

        l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                         input_var=input_var)

        l_in_drop = lasagne.layers.DropoutLayer(l_in, p=dropout_rate)

       

        self.l2_penalty = regularize_layer_params(l_in_drop, l2)
        self.l1_penalty = regularize_layer_params(l_in_drop, l1)

        


        l_out = lasagne.layers.DenseLayer(
                l_in_drop, num_units=10,
                nonlinearity=lasagne.nonlinearities.softmax)


        return l_out
Example #29
0
def compile_conv_ae(hyper_params,preproc):
    l_hid,l_out,in_var=build_conv_ae(hyper_params)
    params = lasagne.layers.get_all_params(l_out, trainable=True)
    target_var = T.ivector('targets')
    reconstruction = lasagne.layers.get_output(l_out)
    reduction=lasagne.layers.get_output(l_hid)
    loss = lasagne.objectives.squared_error(reconstruction, in_var).mean()
    l1_penalty = regularize_layer_params(l_hid, l1) * 0.0001
    loss+=l1_penalty  
    updates=lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.001, momentum=0.8) 
    return ConvAutoencoder(hyper_params,l_out,preproc,in_var,
                         reduction,reconstruction,loss,updates)    
Example #30
0
def objective(
    output_layer,
    regularize_layers,
    target,
    loss_function=squared_error,
    aggregate=aggregate,
    deterministic=False,
    l1=0,
    l2=0,
    tv=0,
):
    network_output = layers.get_output(output_layer, deterministic=deterministic)
    loss = aggregate(loss_function(network_output, target))
    for layer in regularize_layers:
        if l1:
            loss += regularization.regularize_layer_params(layer, regularization.l1) * l1
        if l2:
            loss += regularization.regularize_layer_params(layer, regularization.l2) * l2
    if tv:
        loss += T.mean(T.abs_(network_output[:, 1:] - network_output[:, :-1])) * tv
    return loss
Example #31
0
def objective_with_L2(layers,
                      loss_function,
                      target,
                      aggregate=aggregate,
                      deterministic=False,
                      get_output_kw=None):
    reg = regularize_layer_params([layers["hidden5"]], l2)
    loss = objective(layers, loss_function, target, aggregate, deterministic, get_output_kw)
    
    if deterministic is False:
        return loss + reg * lambda_regularization
    else:
        return loss
Example #32
0
def weight_decay_objective(layers,
                           loss_function,
                           target,
                           penalty_conv=1e-8,
                           penalty_conv_type=l2,
                           penalty_output=1e-8,
                           penalty_output_type=l2,
                           aggregate=aggregate,
                           deterministic=False,
                           get_output_kw={}):
    '''
    Defines L2 weight decay on network weights. 
    '''
    net_out = get_output(layers[-1],
                         deterministic=deterministic,
                         **get_output_kw)
    loss = loss_function(net_out, target)
    p1 = penalty_conv * regularize_layer_params(layers[1], penalty_conv_type)
    p2 = penalty_output * regularize_layer_params(layers[-1],
                                                  penalty_output_type)
    losses = loss + p1 + p2
    return aggregate(losses)
Example #33
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    """
    Default implementation of the NeuralNet objective.

    :param layers: The underlying layers of the NeuralNetwork
    :param loss_function: The callable loss function to use
    :param target: the expected output

    :param aggregate: the aggregation function to use
    :param deterministic: Whether or not to get a deterministic output
    :param l1: Optional l1 regularization parameter
    :param l2: Optional l2 regularization parameter
    :param get_output_kw: optional kwargs to pass to
                          :meth:`NeuralNetwork.get_output`
    :return: The total calculated loss
    """
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(output_layer,
                                deterministic=deterministic,
                                **get_output_kw)
    loss = aggregate(loss_function(network_output, target))

    if l1:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l2) * l2
    return loss
Example #34
0
    def _init_train_fn(self):
        """
        Initialize Theano function to compute loss and update weights using Adam for a single epoch and minibatch.
        """
        input_var = tensor5('input')
        output_var = T.lvector('output')
        one_hot = T.extra_ops.to_one_hot(output_var, self.num_classes, dtype='int64')

        # output_one_hot = T.extra_ops.to_one_hot(output_var, self.num_classes, dtype='int64')
        # Compute losses by iterating over the input variable (a 5D tensor where each "row" represents a clip that
        # has some number of frames.
        [losses, predictions], updates = theano.scan(fn=lambda X_clip, output: self.model.clip_loss(X_clip, output),
                                                     outputs_info=None,
                                                     sequences=[input_var, one_hot])

        loss = losses.mean()

        output_layer = self.model.layer('fc8')
        l2_penalty = regularization.regularize_layer_params(output_layer, regularization.l2) * self.reg * 0.5
        for layer_key in self.tuning_layers:
            layer = self.model.layer(layer_key)
            l2_penalty += regularization.regularize_layer_params(layer, regularization.l2) * self.reg * 0.5
        loss += l2_penalty

        # Get params for output layer and update using Adam
        params = output_layer.get_params(trainable=True)
        adam_update = lasagne.updates.adam(loss, params, learning_rate=self.output_lr)

        # Combine update expressions returned by theano.scan() with update expressions returned from the adam update
        updates.update(adam_update)
        for layer_key in self.tuning_layers:
            layer = self.model.layer(layer_key)
            layer_params = layer.get_params(trainable=True)
            layer_adam_updates = lasagne.updates.adam(loss, layer_params, learning_rate=self.tuning_lr)
            updates.update(layer_adam_updates)
        self.train_function = theano.function([input_var, output_var], [loss, predictions], updates=updates)
def build_discriminator_lstm(params, gate_params, cell_params):
    from lasagne.layers import InputLayer, DenseLayer, concat
    from lasagne.layers.recurrent import LSTMLayer
    from lasagne.regularization import l2, regularize_layer_params
    # from layers import MinibatchLayer
    # input layers
    l_in = InputLayer(
        shape=params['input_shape'], name='d_in')
    l_mask = InputLayer(
        shape=params['mask_shape'], name='d_mask')

    # recurrent layers for bidirectional network
    l_forward = LSTMLayer(
        l_in, params['n_units'], grad_clipping=params['grad_clip'],
        ingate=gate_params, forgetgate=gate_params,
        cell=cell_params, outgate=gate_params,
        nonlinearity=params['non_linearities'][0], only_return_final=True,
        mask_input=l_mask)
    l_backward = LSTMLayer(
        l_in, params['n_units'], grad_clipping=params['grad_clip'],
        ingate=gate_params, forgetgate=gate_params,
        cell=cell_params, outgate=gate_params,
        nonlinearity=params['non_linearities'][1], only_return_final=True,
        mask_input=l_mask, backwards=True)

    # concatenate output of forward and backward layers
    l_concat = concat([l_forward, l_backward], axis=1)

    # minibatch layer on forward and backward layers
    # l_minibatch = MinibatchLayer(l_concat, num_kernels=100)

    # output layer
    l_out = DenseLayer(
        l_concat, num_units=params['n_output_units'],
        nonlinearity=params['non_linearities'][2])

    regularization = regularize_layer_params(
        l_out, l2) * params['regularization']

    class Discriminator:
        def __init__(self, l_in, l_mask, l_out):
            self.l_in = l_in
            self.l_mask = l_mask
            self.l_out = l_out
            self.regularization = regularization

    return Discriminator(l_in, l_mask, l_out)
Example #36
0
 def objective(layers, loss_function, target, aggregate=aggregate,
               deterministic=False, get_output_kw=None):
     if get_output_kw is None:
         get_output_kw = {}
     output_layer = layers[-1]
     first_layer = layers[1]
     network_output = lasagne.layers.get_output(
         output_layer, deterministic=deterministic, **get_output_kw)
     if not deterministic:
         losses = loss_function(network_output, target) \
                 + l2 * regularization.regularize_network_params(
                     output_layer, regularization.l2) \
                 + l1 * regularization.regularize_layer_params(
                     first_layer, regularization.l1)
     else:
         losses = loss_function(network_output, target)
     return aggregate(losses)
Example #37
0
def define_updates(network, inputs, targets):
    prediction = lasagne.layers.get_output(network)

    loss = lasagne.objectives.categorical_crossentropy(T.clip(prediction, 0.00001, 0.99999), targets)
    loss = loss.mean()

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(T.clip(test_prediction, 0.00001, 0.99999), targets)
    test_loss = test_loss.mean()


    l2_loss = regularize_layer_params(network, l2) * params.L2_LAMBDA
    loss = loss + l2_loss
    test_loss = test_loss + l2_loss


    acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets),
                dtype=theano.config.floatX)
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), targets),
                dtype=theano.config.floatX)


    l_r = theano.shared(np.array(params.LEARNING_RATE, dtype=theano.config.floatX))

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD), but Lasagne offers plenty more.
    network_params = lasagne.layers.get_all_params(network, trainable=True)
    if params.OPTIMIZATION == "MOMENTUM":
        updates = lasagne.updates.momentum(loss, network_params, learning_rate=l_r, momentum=params.MOMENTUM)
    elif params.OPTIMIZATION == "ADAM":
        updates = lasagne.updates.adam(loss, network_params, learning_rate=l_r)
    elif params.OPTIMIZATION == "RMSPROP":
        updates = lasagne.updates.adam(loss, network_params)

    prediction_binary = T.argmax(prediction, axis=1)
    test_prediction_binary = T.argmax(test_prediction, axis=1)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([inputs, targets], [loss, l2_loss, acc, prediction_binary], updates=updates)
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([inputs, targets], [test_loss, l2_loss, test_acc, test_prediction_binary])

    return train_fn, val_fn, l_r
Example #38
0
    def __init__(self, *args, **kwargs):
        super(TrainerMixin, self).__init__(*args, **kwargs)
        input_var = tensor.tensor4('inputs')
        target_var = tensor.ivector('targets')

        loss, _ = loss_acc(self.model,
                           input_var,
                           target_var,
                           deterministic=False)
        layers = get_all_layers(self.model)
        decay = regularize_layer_params(layers, l2) * 0.0001
        loss = loss + decay

        params = get_all_params(self.model, trainable=True)
        updates = momentum(loss,
                           params,
                           momentum=0.9,
                           learning_rate=self.learning_rate)
        self.set_training(input_var, target_var, loss, updates)
def nll_l2(predictions,
           targets,
           net,
           batch_size,
           num_samples,
           rw=None,
           train_clip=False,
           thresh=3,
           weight_decay=0.00001,
           **kwargs):
    if rw is None:
        rw = theano.shared(np.cast[theano.config.floatX](0))

    print('Weight decay:', weight_decay)

    loss = categorical_crossentropy(predictions, targets).mean()
    loss += rg.regularize_layer_params(ll.get_all_layers(net),
                                       rg.l2) * weight_decay

    return loss, rw
Example #40
0
def compile_train_function(neural_network, lr, w_dacy):
    input_var = neural_network['input'].input_var
    output_var = T.lvector()  # Variable symbolic

    predicted = lasagne.layers.get_output(neural_network['out'], inputs=input_var)  # Answer of output

    loss = lasagne.objectives.categorical_crossentropy(predicted, output_var)  # Function of error
    loss = loss.mean()
    """
    Regularize L2 (avoid over-fitting)
    Only to function of train
    
    Lreg = L + λ*∑(w^2)
    where:  L --> loss
            λ --> weight decay
            w --> weight
    """
    loss += w_dacy * regularize_layer_params(neural_network['out'], l2)  # Regularize L2

    # Accuracy rate
    y_pred = T.argmax(predicted, axis=1)
    acc = T.eq(y_pred, output_var)
    acc = acc.mean()

    valid_predicted = lasagne.layers.get_output(neural_network['out'], inputs=input_var)  # Validation answer of output
    valid_loss = lasagne.objectives.categorical_crossentropy(valid_predicted, output_var)  # Validation function of error
    valid_loss = valid_loss.mean()

    # Validation accuracy rate
    valid_y_pred = T.argmax(valid_predicted, axis=1)
    valid_acc = T.eq(valid_y_pred, output_var)
    valid_acc = valid_acc.mean()

    # Parameters updating
    params = lasagne.layers.get_all_params(neural_network['out'])
    updates = lasagne.updates.sgd(loss, params, lr)

    # Compile function
    train_fn = theano.function([input_var, output_var], [loss, acc], updates=updates)
    valid_fn = theano.function([input_var, output_var], [valid_loss, valid_acc])
    return train_fn, valid_fn
Example #41
0
def build_mlp(size_x, lstm_size, input_var=None):

    lstm_nonlinearity = lasagne.nonlinearities.sigmoid
    gate_parameters = lasagne.layers.recurrent.Gate(
            W_in=lasagne.init.Orthogonal(),
            W_hid=lasagne.init.Orthogonal(),
            b=lasagne.init.Constant(0.))
    cell_parameters = lasagne.layers.recurrent.Gate(
            W_in=lasagne.init.Orthogonal(),
            W_hid=lasagne.init.Orthogonal(),
            # Setting W_cell to None denotes that no
            # cell connection will be used.
            W_cell=None,
            b=lasagne.init.Constant(0.),
            # By convention, the cell nonlinearity is tanh in an LSTM.
            nonlinearity=lasagne.nonlinearities.tanh)

    l_in = InputLayer((None, None, size_x),
                      input_var=input_var)
    batch_size, seqlen, _ = l_in.input_var.shape
    l_lstm = LSTMLayer(l_in, lstm_size, learn_init=True,
                       nonlinearity=lstm_nonlinearity,
                       ingate=gate_parameters,
                       forgetgate=gate_parameters,
                       cell=cell_parameters,
                       outgate=gate_parameters,
                       grad_clipping=100.)
    l2_penalty = regularize_layer_params(l_lstm, l2)
    l_reshape = lasagne.layers.ReshapeLayer(l_lstm, (-1, lstm_size))
    # Now, we can apply feed-forward layers as usual.
    l_dense = lasagne.layers.DenseLayer(
     l_reshape, num_units=1, nonlinearity=None)
    # Now, the shape will be n_batch*n_timesteps, 1. We can then reshape to
    # batch_size, seqlen to get a single value
    # for each timstep from each sequence
    l_out = lasagne.layers.ReshapeLayer(l_dense, (batch_size, seqlen, size_x))
    # l1_penalty = regularize_layer_params(l_out, l2)
    return l_out, l2_penalty  # , l1_penalty
Example #42
0
    def default_training(self):
        """Set the training (updates) for this trainer."""
        input_var = tensor.tensor4('inputs')
        target_var = tensor.ivector('targets')
        errors = OrderedDict()

        loss, acc = loss_acc(self.model,
                             input_var,
                             target_var,
                             deterministic=False)
        errors['train_acc'] = acc
        errors['classification error'] = loss
        layers = get_all_layers(self.model)
        decay = regularize_layer_params(layers, l2) * self.weight_decay
        errors['weight decay'] = decay

        loss = loss + decay

        params = get_all_params(self.model, trainable=True)
        updates = self.momentum_method(loss,
                                       params,
                                       momentum=self.momentum,
                                       learning_rate=self.learning_rate)
        self.set_training(input_var, target_var, loss, updates, values=errors)
Example #43
0
    def _get_loss_updates(self,
                          L1_reg=0.0,
                          L2_reg=0.001,
                          update_fn=lasagne.updates.nesterov_momentum,
                          max_norm=None,
                          deterministic=False,
                          momentum=0.9,
                          **kwargs):

        loss = (self._negative_log_likelihood(self.network_1, self.E,
                                              deterministic) +
                self._negative_log_likelihood(self.network_2, self.E,
                                              deterministic) +
                regularize_layer_params(self.network_1, l1) * L1_reg +
                regularize_layer_params(self.network_1, l2) * L2_reg +
                regularize_layer_params(self.network_2, l1) * L1_reg +
                regularize_layer_params(self.network_2, l2) * L2_reg +
                (regularize_layer_params(self.network_1, l2) -
                 regularize_layer_params(self.network_2, l2)) * L2_reg)

        if max_norm:
            grads = T.grad(loss, self.params)
            scaled_grads = lasagne.updates.total_norm_constraint(
                grads, max_norm)
            updates = update_fn(scaled_grads, self.params, **kwargs)
        else:
            updates = update_fn(loss, self.params, **kwargs)

        if momentum:
            updates = lasagne.updates.apply_nesterov_momentum(
                updates, self.params, self.learning_rate, momentum=momentum)

        # If the model was loaded from file, reload params
        if self.restored_update_params:
            for p, value in zip(updates.keys(), self.restored_update_params):
                p.set_value(value)
            self.restored_update_params = None

        # Store last update function to be later saved
        self.updates = updates

        return loss, updates
Example #44
0
    prediction = T.clip(prediction, 0.0000001, 0.9999999)

    #binary crossentropy is the best choice for a multi-class sigmoid output
    loss = T.mean(objectives.binary_crossentropy(prediction, targets))

    return loss


#theano variable for the class targets
targets = T.matrix('targets', dtype=theano.config.floatX)

#get the network output
prediction = l.get_output(NET)

#we use L2 Norm for regularization
l2_reg = regularization.regularize_layer_params(NET,
                                                regularization.l2) * L2_WEIGHT

#calculate the loss
if MULTI_LABEL:
    loss = calc_loss_multi(prediction, targets) + l2_reg
else:
    loss = calc_loss(prediction, targets) + l2_reg


################# ACCURACY FUNCTION #####################
def calc_accuracy(prediction, targets):

    #we can use the lasagne objective categorical_accuracy to determine the top1 single label accuracy
    a = T.mean(objectives.categorical_accuracy(prediction, targets, top_k=1))

    return a
Example #45
0
def main(model='cnn', num_epochs=5):
    # Load the dataset
    print "Loading data..."
    X_train, y_train, X_val, y_val, X_test, y_test = load_data_set()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs', dtype=theano.config.floatX)
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print "Building model and compiling functions..."

    if model == 'cnn':
        network = build_cnn(input_var)
    else:
        print "Unrecognized model type %r.", model
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    # loss = lasagne.objectives.binary_crossentropy(prediction,target_var)
    loss = loss.mean()
    l2_penalty = regularize_layer_params(network, l2) * 1e-1
    loss += l2_penalty
    acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var),
                 dtype=theano.config.floatX)

    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.001,
                                                momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    # test_loss = lasagne.objectives.binary_crossentropy(test_prediction,target_var)

    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], [loss, acc],
                               updates=updates,
                               allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc],
                             allow_input_downcast=True)

    test_pre = theano.function([input_var, target_var], [prediction],
                               on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    train_out = open("result/train_loss.txt", 'w')
    val_out = open("result/val_loss.txt", 'w')
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
            inputs, targets = batch
            # print type(targets), targets
            err, acc = train_fn(inputs, targets)
            train_err += err
            train_acc += acc
            # train_err += train_fn(inputs, targets)
            train_batches += 1
        train_out.write(str(train_err) + "\r\n")

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1
        val_out.write(str(val_err) + "\r\n")

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  training accuracy:\t\t{:.2f} %".format(train_acc /
                                                        train_batches * 100))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))

    # After training, we compute and print the test error:
    train_out.close()
    val_out.close()
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatches(X_test, y_test, 5, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
        pre = test_pre(inputs, targets)
        print "预测概率:", pre
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))
Example #46
0
def main(num_epochs=200):
    # Load the dataset
    print("Loading data...")
    datasets = load_data()
    X_train, y_train = datasets[0]
    X_test, y_test = datasets[1]
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    learnrate=0.02
    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")

    network = build_cnn(input_var)
    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    l2_penalty = regularize_layer_params(network, l2)
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()+0.1*l2_penalty
    # We could add some weight decay as well here, see lasagne.regularization.

    params = lasagne.layers.get_all_params(network, trainable=True)

    #optimizer:
    #updates = lasagne.updates.adadelta(loss, params,learning_rate=learnrate)
    updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=learnrate, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    best_acc = 0
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        if epoch % 8 == 7:
            learnrate*= 0.96
            #updates = lasagne.updates.adadelta(loss, params,learning_rate=learnrate)
            updates = lasagne.updates.nesterov_momentum(
                loss, params, learning_rate=learnrate, momentum=0.9)
            train_fn = theano.function([input_var, target_var], loss, updates=updates)

        for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        test_err = 0
        test_acc = 0
        test_batches = 0
        for batch in iterate_minibatches(X_test, y_test,batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            test_err += err
            test_acc += acc
            test_batches += 1
        test_err = test_err / test_batches
        test_acc = test_acc / test_batches
        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  test loss:\t\t{:.6f}".format(test_err))
        print("  validation accuracy:\t\t{:.2f} %".format(
            test_acc * 100))

        if test_acc > best_acc:
            best_acc = test_acc
            np.savez('model10.npz', *lasagne.layers.get_all_param_values(network))
    print("final accuracy is:\t\t{:.6f}".format(best_acc * 100))
    print('*****************************************************\n'*2)
    return best_acc
def main(num_epochs=100):
    # Load the dataset
    print("Loading data...")
    datasets = load_data()
    X_train, y_train = datasets[0], datasets[1]
    X_val, y_val = datasets[2], datasets[3]
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    learnrate=0.005
    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")

    network = build_cnn(input_var)
    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    l2_penalty = regularize_layer_params(network, l2)
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()+0.01*l2_penalty
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=learnrate, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    best_val_loss=10
    improvement_threshold=0.999
    best_acc=0
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        if epoch % 8 == 7:
            learnrate*=0.96
        updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=learnrate, momentum=0.9)
        for batch in iterate_minibatches(X_train, y_train,BATCHSIZE, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, BATCHSIZE, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))
        if val_err/val_batches < best_val_loss*improvement_threshold:
            np.savez('best_model_omit5_v2.npz', *lasagne.layers.get_all_param_values(network))
            best_val_loss=val_err/val_batches
            print("                    best validation loss\t\t{:.6f}".format(best_val_loss))

        if val_acc / val_batches>best_acc:
            best_acc=val_acc / val_batches
            np.savez('best_classification_model_omit5_v2.npz', *lasagne.layers.get_all_param_values(network))
            print('                    saved best classification  model')
Example #48
0
    split = data.split_data(labels, args.seeds)
    maxf = get_maxf(features)

    trainx, trainy = constuct_dataset(features, labels, label_set, split[0], maxf)
    testx, testy = constuct_dataset(features, labels, label_set, split[1], maxf)
    allx, ally = constuct_dataset(features, labels, label_set, features.keys(), maxf)

    input_var = sparse.csr_matrix(name = 'x', dtype = 'float32')
    un_var = sparse.csr_matrix(name = 'ux', dtype = 'float32')
    target_var = T.imatrix('targets')
    ent_target = T.ivector('ent_targets')
    network, l_entropy = build_model(input_var, maxf + 1, trainy.shape[1], args.ent_reg > 0, un_var)

    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean() + regularize_layer_params(network, l2) * args.param_reg

    if args.ent_reg > 0.0:
        ent_pred = lasagne.layers.get_output(l_entropy)
        loss += lasagne.objectives.binary_crossentropy(ent_pred, ent_target).mean() * args.ent_reg

    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=args.learning_rate, momentum = 0.9)

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)

    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis = 1)),
                      dtype=theano.config.floatX)
    def create_nnet(input_dims, action_dims, observation_dims, value_dims, learning_rate, grad_clip=None, l1_weight=None, l2_weight=None,
                    num_hidden_units=20, num_hidden_action_units=None, num_hidden_observ_units=None, num_hidden_value_units=None,
                    batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify,
                    output_nonlinearity=None, update_method=updates.sgd):
        commonlayers = []
        commonlayers.append(layers.InputLayer(shape=(None, input_dims)))
        commonlayers.append(DenseLayer(commonlayers[-1], num_hidden_units,
                                               nonlinearity=hidden_nonlinearity))
        if num_hidden_action_units is None:
            actionlayers = [DenseLayer(commonlayers[-1], action_dims,
                                               nonlinearity=output_nonlinearity)]
        else:
            actionlayers = [DenseLayer(commonlayers[-1], num_hidden_action_units,
                                               nonlinearity=output_nonlinearity)]
            actionlayers.append(DenseLayer(actionlayers[-1], action_dims,
                                                   nonlinearity=output_nonlinearity))
        if num_hidden_observ_units is None:
            observlayers = [DenseLayer(commonlayers[-1], observation_dims,
                                               nonlinearity=output_nonlinearity)]
        else:
            observlayers = [DenseLayer(commonlayers[-1], num_hidden_observ_units,
                                               nonlinearity=output_nonlinearity)]
            observlayers.append(DenseLayer(observlayers[-1], observation_dims, nonlinearity=output_nonlinearity))
        if num_hidden_value_units is None:
            dvaluelayers = [DenseLayer(commonlayers[-1], value_dims,
                                               nonlinearity=output_nonlinearity)]
        else:
            dvaluelayers = [DenseLayer(commonlayers[-1], num_hidden_value_units,
                                               nonlinearity=output_nonlinearity)]
            dvaluelayers.append(DenseLayer(dvaluelayers[-1], value_dims,
                                                   nonlinearity=output_nonlinearity))
        actvallayers = [layers.ConcatLayer([actionlayers[-1], dvaluelayers[-1]])]
        obsvallayers = [layers.ConcatLayer([observlayers[-1], dvaluelayers[-1]])]
        concatlayers = [layers.ConcatLayer([actionlayers[-1], observlayers[-1], dvaluelayers[-1]])]
        action_prediction = layers.get_output(actionlayers[-1])
        dvalue_prediction = layers.get_output(dvaluelayers[-1])
        actval_prediction = layers.get_output(actvallayers[-1])
        obsval_prediction = layers.get_output(obsvallayers[-1])
        concat_prediction = layers.get_output(concatlayers[-1])
        input_var = commonlayers[0].input_var
        action_target = T.matrix(name="action_target", dtype=floatX)
        dvalue_target = T.matrix(name="value_target", dtype=floatX)
        actval_target = T.matrix(name="actval_target", dtype=floatX)
        obsval_target = T.matrix(name="obsval_target", dtype=floatX)
        concat_target = T.matrix(name="concat_target", dtype=floatX)
        action_loss = objectives.squared_error(action_prediction, action_target).mean()
        obsval_loss = objectives.squared_error(obsval_prediction, obsval_target).mean()
        dvalue_loss = objectives.squared_error(dvalue_prediction, dvalue_target).mean()
        actval_loss = objectives.squared_error(actval_prediction, actval_target).mean()
        concat_loss = objectives.squared_error(concat_prediction, concat_target).mean()
        if l1_weight is not None:
            action_l1penalty = regularize_layer_params(commonlayers + actionlayers, l1) * l1_weight
            obsval_l1penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l1) * l1_weight
            dvalue_l1penalty = regularize_layer_params(commonlayers + dvaluelayers, l1) * l1_weight
            actval_l1penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l1) * l1_weight
            concat_l1penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l1) * l1_weight
            action_loss += action_l1penalty
            obsval_loss += obsval_l1penalty
            dvalue_loss += dvalue_l1penalty
            actval_loss += actval_l1penalty
            concat_loss += concat_l1penalty
        if l2_weight is not None:
            action_l2penalty = regularize_layer_params(commonlayers + actionlayers, l2) * l2_weight
            obsval_l2penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l2) * l2_weight
            dvalue_l2penalty = regularize_layer_params(commonlayers + dvaluelayers, l2) * l2_weight
            actval_l2penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l2) * l2_weight
            concat_l2penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l2) * l2_weight
            action_loss += action_l2penalty
            obsval_loss += obsval_l2penalty
            dvalue_loss += dvalue_l2penalty
            actval_loss += actval_l2penalty
            concat_loss += concat_l2penalty
        action_params = layers.get_all_params(actionlayers[-1], trainable=True)
        obsval_params = layers.get_all_params(obsvallayers[-1], trainable=True)
        dvalue_params = layers.get_all_params(dvaluelayers[-1], trainable=True)
        actval_params = layers.get_all_params(actvallayers[-1], trainable=True)
        concat_params = layers.get_all_params(concatlayers[-1], trainable=True)
        if grad_clip is not None:
            action_grads = theano.grad(action_loss, action_params)
            obsval_grads = theano.grad(obsval_loss, obsval_params)
            dvalue_grads = theano.grad(dvalue_loss, dvalue_params)
            actval_grads = theano.grad(actval_loss, actval_params)
            concat_grads = theano.grad(concat_loss, concat_params)
            action_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in action_grads]
            obsval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in obsval_grads]
            dvalue_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in dvalue_grads]
            actval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in actval_grads]
            concat_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in concat_grads]
            action_updates = update_method(action_grads, action_params, learning_rate)
            obsval_updates = update_method(obsval_grads, obsval_params, learning_rate)
            dvalue_updates = update_method(dvalue_grads, dvalue_params, learning_rate)
            actval_updates = update_method(actval_grads, actval_params, learning_rate)
            concat_updates = update_method(concat_grads, concat_params, learning_rate)
        else:
            action_updates = update_method(action_loss, action_params, learning_rate)
            obsval_updates = update_method(obsval_loss, obsval_params, learning_rate)
            dvalue_updates = update_method(dvalue_loss, dvalue_params, learning_rate)
            actval_updates = update_method(actval_loss, actval_params, learning_rate)
            concat_updates = update_method(concat_loss, concat_params, learning_rate)

        fit_action = theano.function([input_var, action_target], action_loss, updates=action_updates)
        fit_obsval = theano.function([input_var, obsval_target], obsval_loss, updates=obsval_updates)
        fit_dvalue = theano.function([input_var, dvalue_target], dvalue_loss, updates=dvalue_updates)
        fit_actval = theano.function([input_var, actval_target], actval_loss, updates=actval_updates)
        fit_concat = theano.function([input_var, concat_target], concat_loss, updates=concat_updates)

        predict_action = theano.function([input_var], action_prediction)
        predict_obsval = theano.function([input_var], obsval_prediction)
        predict_dvalue = theano.function([input_var], dvalue_prediction)
        predict_actval = theano.function([input_var], actval_prediction)
        predict_concat = theano.function([input_var], concat_prediction)

        nnet = Mock(
            fit_action=fit_action,
            fit_obsval=fit_obsval,
            fit_value=fit_dvalue,
            fit_actval=fit_actval,
            fit_both=fit_concat,
            predict_action=predict_action,
            predict_obsval=predict_obsval,
            predict_value=predict_dvalue,
            predict_actval=predict_actval,
            predict_both=predict_concat,
        )
        return nnet
Example #50
0
def main(num_epochs=500):
    # Load the dataset
    print("Loading data...")
    #X_train, y_train, X_val, y_val, X_test, y_test= pull_data()
    trainX, trainY, valX, valY, testX, testY = pull_data()
    
    trainX = normalize(trainX.reshape(trainX.shape[0],1, DIM, DIM))
    valX = normalize(valX.reshape(valX.shape[0],1, DIM, DIM))
    testX = normalize(testX.reshape(testX.shape[0],1, DIM, DIM))

    trainY = trainY - 1
    valY = valY - 1
    testY = testY - 1

    trainX, trainY = shuffle(trainX, trainY)
    valX, valY = shuffle(valX, valY)
    testX, testY = shuffle(testX, testY)

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    output_var = T.ivector('targets')

    model = build_cnn(input_var)
    print "[X] CNN defining its goals."
    
    model_params = lasagne.layers.get_all_params(model, trainable=True)
    sh_lr = theano.shared(lasagne.utils.floatX(LEARNING_RATE))

    #why do we want to compute output expressions for model and input_var???
    noisy_output = lasagne.layers.get_output(model, input_var, deterministic=False)
    true_output = lasagne.layers.get_output(model, input_var, deterministic=True)

    noisy_prediction = T.argmax(noisy_output, 1)
    true_prediction = T.argmax(true_output, 1)

    l2_loss = regularize_layer_params(model, l2)*L2_REG

    ## Loss expression
    noisy_cost = T.mean(T.nnet.categorical_crossentropy(noisy_output, output_var)) + l2_loss
    true_cost = T.mean(T.nnet.categorical_crossentropy(true_output, output_var)) + l2_loss

    ## error values
    noisy_error = 1.0 - T.mean(lasagne.objectives.categorical_accuracy(noisy_output, output_var))
    true_error = 1.0 - T.mean(lasagne.objectives.categorical_accuracy(true_output, output_var))

    ## stochastic gradient descent updates
    #updates = lasagne.updates.sgd(noisy_cost, model_params, learning_rate=sh_lr)
    ##stochastic gradient descent with Nesterov momentum

    updates = lasagne.updates.nesterov_momentum(
            noisy_cost, model_params, learning_rate=sh_lr, momentum=0.99)

    train = theano.function([input_var,output_var], [noisy_cost, noisy_error], 
        updates=updates, 
        allow_input_downcast=True)

    get_score = theano.function([input_var,output_var], [true_cost, true_error], 
        allow_input_downcast=True)

    best_validation_cost = np.inf
    best_iter = 0
    

    n_train_batches = int(np.ceil(trainX.shape[0] / float(BATCH_SIZE)))

    plot_iters = []

    plot_train_cost = []
    plot_train_error = []

    plot_valid_cost = []
    plot_valid_error = []

    plot_test_cost = []
    plot_test_error = []

    epoch = 0
    print "[X] CNN begins its training."
    try: 
        while True:
            epoch = epoch + 1

            for minibatch_index in xrange(n_train_batches):
                iter = (epoch - 1) * n_train_batches + minibatch_index  
                if iter % 100 == 0:
                    print "[O] Training at iteration %d." % iter

                cost_ij = train(trainX[minibatch_index*BATCH_SIZE:np.minimum((minibatch_index+1)*BATCH_SIZE, trainX.shape[0])], 
                    trainY[minibatch_index*BATCH_SIZE:np.minimum((minibatch_index+1)*BATCH_SIZE, trainY.shape[0])])

                if (iter+1) % VALIDATION_FREQUENCY == 0:
                    train_cost, train_error = get_score(trainX, trainY)
                    valid_cost, valid_error = get_score(valX, valY)
                    test_cost, test_error = get_score(testX, testY)

                    plot_train_cost.append(train_cost)
                    plot_train_error.append(train_error)

                    plot_valid_cost.append(valid_cost)
                    plot_valid_error.append(valid_error)

                    plot_test_cost.append(test_cost)
                    plot_test_error.append(test_error)

                    plot_iters.append(iter)

                    ## plotting functions
                    if not os.path.exists(FIGURE_SAVE_DIR):
                        os.makedirs(FIGURE_SAVE_DIR)
                    plot_curves(plot_iters, plot_train_cost, plot_valid_cost, 'Training Cost', 'Validation Cost', 'train_val_cost.pdf')
                    plot_curves(plot_iters, plot_train_error, plot_valid_error, 'Training Error', 'Validation Error', 'train_val_error.pdf')
                    
                    #plot_cm(train_pred, trainY, 'Confusion Matrix on the Training Set', 'cm_train.pdf')
                    #plot_cm(valid_pred, valY, 'Confusion Matrix on the Validation Set', 'cm_valid.pdf')
                    #plot_cm(test_pred, testY, 'Confusion Matrix on the Test Set', 'cm_test.pdf')

                    print "--> Epoch %i, minibatch %i/%i has training true cost \t %f." % (epoch, minibatch_index+1, n_train_batches, train_cost)
                    print "--> Epoch %i, minibatch %i/%i has validation true cost \t %f and error of \t %f %%." % (epoch, minibatch_index+1, n_train_batches, valid_cost, valid_error)

                    if valid_cost < best_validation_cost:
                        print "----> New best score found!"
                        print "--> Test cost of %f and test error of %f." % (test_cost, test_error)
                        if not os.path.exists(PARAM_SAVE_DIR):
                            os.makedirs(PARAM_SAVE_DIR)
                        for f in glob.glob(PARAM_SAVE_DIR+'/*'):
                            os.remove(f)
                        all_param_values = lasagne.layers.get_all_param_values(model)
                        joblib.dump(all_param_values, os.path.join(PARAM_SAVE_DIR, 'params.pkl'))
                        print "----> Parameters saved."
                        best_validation_cost = valid_cost
                        best_iter = iter
    except KeyboardInterrupt:
        pass

    end_time = timeit.default_timer()

    print "--> Best validation score of %f." % best_validation_cost
    print "--> Total runtime %.2f minutes." % ((end_time-start_time) / 60.)
    print "[X] Saving the scores."

    joblib.dump(plot_iters, os.path.join(PARAM_SAVE_DIR, "iters.pkl"))

    joblib.dump(plot_train_cost, os.path.join(PARAM_SAVE_DIR, "train_cost.pkl"))
    joblib.dump(plot_train_error, os.path.join(PARAM_SAVE_DIR, "train_error.pkl"))

    joblib.dump(plot_valid_cost, os.path.join(PARAM_SAVE_DIR, "valid_cost.pkl"))
    joblib.dump(plot_valid_error, os.path.join(PARAM_SAVE_DIR, "valid_error.pkl"))

    joblib.dump(plot_test_cost, os.path.join(PARAM_SAVE_DIR, "test_cost.pkl"))
    joblib.dump(plot_test_error, os.path.join(PARAM_SAVE_DIR, "test_error.pkl"))
Example #51
0
def main(num_epochs=200):
    # Load the dataset
    print("Loading data...")
    datasets = load_data()
    X_train, y_train = datasets[0]
    X_test, y_test = datasets[1]
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    learnrate = 0.01
    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")

    network = build_cnn(input_var)
    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    l2_penalty = regularize_layer_params(network, l2)
    l1_penalty = regularize_layer_params(network, l1)
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean() + 5 * l2_penalty + l1_penalty
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)

    #updates = lasagne.updates.adadelta(loss, params)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=learnrate,
                                                momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    best_acc = 0
    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        if epoch % 8 == 7:
            learnrate *= 0.96
            #updates = lasagne.updates.adadelta(loss, params,learning_rate=learnrate)
            updates = lasagne.updates.nesterov_momentum(
                loss, params, learning_rate=learnrate, momentum=0.9)
            train_fn = theano.function([input_var, target_var],
                                       loss,
                                       updates=updates)

        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        test_err = 0
        test_acc = 0
        test_batches = 0
        for batch in iterate_minibatches(X_test,
                                         y_test,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            test_err += err
            test_acc += acc
            test_batches += 1
        test_err = test_err / test_batches
        test_acc = test_acc / test_batches
        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  test loss:\t\t{:.6f}".format(test_err))
        print("  validation accuracy:\t\t{:.2f} %".format(test_acc * 100))

        if test_acc > best_acc:
            best_acc = test_acc
    return best_acc
Example #52
0
def main(num_epochs=100, num_points=1200, compute_flag='cpu'):
    # Arguments passed as string need to be converted to int
    num_epochs = int(num_epochs)
    num_points = int(num_points)
    # Define name of output files
    results_file_name = 'exp_' + str(num_epochs) + '_' + str(
        num_points) + '_' + compute_flag + '.csv'
    network_file_name = 'network_' + str(num_epochs) + '_' + str(
        num_points) + '_' + compute_flag
    print 'Saving file to: %s' % results_file_name
    print 'Number of points: %d ' % num_points
    print 'Compute Flag: %s ' % compute_flag
    save_file(results_file_name)
    Deep_learner = DCNN_network.DCNN_network()
    # Define the input tensor
    input_var = T.tensor4('inputs')
    # Define the output tensor (in this case it is a real value or reflectivity)
    if compute_flag == 'gpu3_softmax':
        output_var = T.ivector('targets')
    else:
        output_var = T.fcol('targets')
    # User input to decide which experiment to run, cpu runs were performed
    # to check if the network was working correctly
    if compute_flag == 'cpu':
        network, l_hidden1 = Deep_learner.build_CNN(input_var)
    elif compute_flag == 'cpu2':
        network, l_hidden1 = Deep_learner.build_CNN_2(input_var)
    elif compute_flag == 'cpu3':
        network, l_hidden1 = Deep_learner.build_CNN_3(input_var)
    elif compute_flag == 'gpu2':
        print('gpu2 experiment')
        network, l_hidden1 = Deep_learner.build_DCNN_2(input_var)
    elif compute_flag == 'gpu3':
        print('gpu3 experiment')
        network, l_hidden1 = Deep_learner.build_DCNN_3(input_var)
    elif compute_flag == 'deep':
        network, l_hidden1 = Deep_learner.build_DCNN_deep(input_var)
    elif compute_flag == 'gpu3_softmax':
        network, l_hidden1 = Deep_learner.build_DCNN_3_softmax(input_var)
    else:
        network, l_hidden1 = Deep_learner.build_DCNN(input_var)

    train_prediction = lasagne.layers.get_output(network)
    test_prediction = lasagne.layers.get_output(network)
    if compute_flag == 'gpu3_softmax':
        loss = lasagne.objectives.categorical_crossentropy(
            train_prediction, output_var)
        loss = loss.mean()
    else:

        # Define the mean square error objective function
        loss = T.mean(
            lasagne.objectives.squared_error(train_prediction, output_var))

        test_loss = T.mean(
            lasagne.objectives.squared_error(test_prediction, output_var))
        # Add a l1 regulerization on the fully connected dense layer
        l1_penalty = regularize_layer_params(l_hidden1, l1)

        loss = loss + l1_penalty

        test_loss = loss + l1_penalty

    params = lasagne.layers.get_all_params(network, trainable=True)

    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.0000001,
                                                momentum=0.9)

    train_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var),
                       dtype=theano.config.floatX)
    # Define theano function which generates and compiles C code for the optimization problem
    train_fn = theano.function([input_var, output_var], [loss, train_acc],
                               updates=updates)

    #    test_fn = theano.function([input_var, output_var],test_loss, updates=updates)

    base_path = '/home/an67a/deep_nowcaster/data/dataset2/'
    training_set_list = os.listdir(base_path)
    training_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' not in x,
                               training_set_list)
    validation_set_list = os.listdir(base_path)
    validation_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' in x,
                                 validation_set_list)
    experiment_start_time = time.time()
    # Load Data Set
    DataSet = []
    print('Loading data set...')
    for file_name in training_set_list[:3]:
        print file_name
        temp_file = file(base_path + file_name, 'rb')
        X_train, Y_train = cPickle.load(temp_file)
        temp_file.close()
        Y_train = Y_train.reshape(-1, ).astype('uint8')
        DataSet.append((X_train, Y_train))

    print('Start training...')
    for epoch in range(num_epochs):
        print('Epoch number : %d ' % epoch)
        train_err = 0
        train_batches = 0
        train_acc = 0
        start_time = time.time()
        for data in DataSet:
            #        for file_name in training_set_list:
            #            print file_name
            #            temp_file = file(base_path + file_name,'rb')
            #            X_train,Y_train = cPickle.load(temp_file)
            #            Y_train = Y_train.astype('uint8')
            #            temp_file.close()
            for batch in iterate_minibatches(data[0],
                                             data[1],
                                             1059,
                                             shuffle=False):
                inputs, targets = batch
                err, acc = train_fn(inputs, targets)
                train_err += err
                train_acc += acc
                train_batches += 1
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(train_acc /
                                                          train_batches * 100))
        append_file(results_file_name, epoch + 1,
                    round(train_err / train_batches, 2),
                    round((train_acc / train_batches) * 100, 2))

        # Dump the network file every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('creating network file')
            network_file = file(
                '/home/an67a/deep_nowcaster/output/' + network_file_name +
                '_' + str(epoch + 1) + '.pkl', 'wb')
            cPickle.dump(network,
                         network_file,
                         protocol=cPickle.HIGHEST_PROTOCOL)
            network_file.close()
    time_taken = round(time.time() - experiment_start_time, 2)
    print('The experiment took {:.3f}s'.format(time.time() -
                                               experiment_start_time))
    append_file(results_file_name, 'The experiment took', time_taken, 0)
def prepare_functions():

    from lasagne.regularization import regularize_layer_params_weighted, regularize_layer_params
    from lasagne.regularization import l1, l2
    """
    This prepares the theano/lasagne functions for use in the training functions
    """
    observations = T.matrix('observations')
    srng = RandomStreams(seed=42)
    predictions = T.vector('predictions')
    predictions_ct = theano.gradient.disconnected_grad_(predictions)
    discounted_reward = T.vector('actual')
    r = T.vector('random')

    # Set up random sampling used in some policies
    rv_u = srng.uniform(size=(1, ))
    r = theano.function([], rv_u)

    # Set up the network
    D_network = QNetwork(observations)
    q_values = lasagne.layers.get_output(D_network)
    probabilities = lasagne.nonlinearities.softmax(q_values)
    D_params = lasagne.layers.get_all_params(D_network, trainable=True)
    get_q_values = theano.function([observations], q_values)

    l1_penalty = 1e-4 * regularize_layer_params(
        lasagne.layers.get_all_layers(D_network), l1)

    # Policies:
    # Policy1: 'greedy_choice': Greedy
    # Policy2: ' weighted_choice': chooses actions based upon probabilities
    policyname = 'greedy'
    #    policyname='greedy'
    if policyname == 'greedy':
        actions = T.argmax(q_values, axis=1)
    elif policyname == 'weighted':
        actions = T.argmax(
            T.abs_(T.extra_ops.cumsum(probabilities, axis=1) - r()), axis=1)
    else:
        raise Exception

    policy_action = theano.function([observations], actions, name=policyname)
    prediction = q_values[:, actions].reshape((-1, ))
    get_prediction = theano.function([observations], prediction)

    D_obj = lasagne.objectives.squared_error(prediction,
                                             discounted_reward
                                             )\
            .mean(axis=0, keepdims=False)# + l1_penalty

    D_updates = lasagne.updates.adam(D_obj, D_params, learning_rate=LEARN_RATE)
    D_train = theano.function([observations, discounted_reward],
                              D_obj,
                              updates=D_updates,
                              name='D_training')

    functions = {}
    functions['get_q_values'] = get_q_values
    functions['policy_action'] = policy_action
    functions['D_train'] = D_train
    functions['D_params'] = D_params
    functions['D_network'] = D_network
    functions['get_params'] = lasagne.layers.get_all_params(D_network)
    functions['get_all_param_values'] = lasagne.layers.get_all_param_values(
        D_network)
    return functions
Example #54
0
                              
l_hidden2 = lasagne.layers.DenseLayer(l_hid1_drop, num_units=600,
            nonlinearity=lasagne.nonlinearities.sigmoid)

l_out = lasagne.layers.DenseLayer(l_hidden2, num_units=10,
                                  nonlinearity=lasagne.nonlinearities.softmax)
                                  
# get the prediction of network
train_prediction = lasagne.layers.get_output(l_out)
#f = theano.function([X], prediction)
# Loss function for train
train_loss = lasagne.objectives.categorical_crossentropy(train_prediction, y)
train_loss = train_loss.mean()

# Regularization
layer1_reg = reg.regularize_layer_params(l_hidden1, reg.l1)*Lambda
layer2_reg = reg.regularize_layer_params(l_hidden2, reg.l1)*Lambda

train_loss = train_loss + layer1_reg + layer2_reg

# train params and updates
params = lasagne.layers.get_all_params(l_out, trainable=True)
updates = lasagne.updates.nesterov_momentum(
            train_loss, params, learning_rate=0.01, momentum=0.9)

# train function
train_fn = theano.function([X, y], train_loss, updates=updates)

# ##############################################################
# Test side
# Test prediction
Example #55
0
all_out = [l_out]
all_out.extend(l_hids)

train_out = lasagne.layers.get_output(
    all_out, deterministic=False)
hids_out_train = train_out[1:]
train_out = train_out[0]

eval_out = lasagne.layers.get_output(
    all_out, deterministic=True)
hids_out_eval = eval_out[1:]
eval_out = eval_out[0]

cost_train = T.mean(networks.calc_cross_ent(train_out, sym_y, paras))
if paras["L2_reg"] > 0:
    cost_train += paras["L2_reg"] * regularize_layer_params(l_out, l2)
if paras["L1_reg"] > 0:
    cost_train += paras["L1_reg"] * regularize_layer_params(l_out, l1)
cost_eval = networks.calc_cross_ent(eval_out, sym_y, paras)

all_params = lasagne.layers.get_all_params(l_out, trainable=True)

updates, norm = networks.gradient_updates(cost_train, all_params, paras, sh_lr,
                                          update_function=eval(paras["optimizer"]))

print("compiling f_eval...")
fun_inp = [sym_x, sym_y]

if paras["rnn_type"] != "lstm":
    hids.pop(-2)
Example #56
0
def main():

    print("Building network ...")
    # Note in Rocktaschel's paper he first used a linear layer to transform wordvector
    # into vector of size K_HIDDEN. I'm assuming that this is equivalent to update W. 
    # Input layer for premise
    input_var_type = T.TensorType('int32', [False] * 2)
    var_name = "input"
    input_var_prem = input_var_type(var_name)
    input_var_hypo = input_var_type(var_name)
    
    l_in_prem = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_PREM), input_var=input_var_prem)
    # Mask layer for premise
    l_mask_prem = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_PREM))
    # Input layer for hypothesis
    l_in_hypo = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_HYPO), input_var=input_var_hypo)
    # Mask layer for hypothesis
    l_mask_hypo = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_HYPO))
    # Word embedding layers
    l_in_prem_hypo = lasagne.layers.ConcatLayer([l_in_prem, l_in_hypo], axis=1)
    l_in_embedding = lasagne.layers.EmbeddingLayer(l_in_prem_hypo, 
        VOCAB_SIZE, WORD_VECTOR_SIZE, W=word_vector_init, name='EmbeddingLayer')
    # Adding this linear layer didn't increase the accuracy, so I comment it out
    # l_in_linear = lasagne.layers.EmbeddingChangeLayer(l_in_embedding, K_HIDDEN, nonlinearity=lasagne.nonlinearities.linear)
    l_in_embedding_dropout = lasagne.layers.DropoutLayer(l_in_embedding, p=DROPOUT_RATE, rescale=True)
    l_in_prem_embedding = lasagne.layers.SliceLayer(l_in_embedding_dropout, 
        slice(0, MAX_LENGTH_PREM), axis=1)
    l_in_hypo_embedding = lasagne.layers.SliceLayer(l_in_embedding_dropout,
        slice(MAX_LENGTH_PREM, MAX_LENGTH_PREM + MAX_LENGTH_HYPO), axis=1)
    # LSTM layer for premise
    l_lstm_prem = lasagne.layers.LSTMLayer_withCellOut(l_in_prem_embedding, K_HIDDEN, 
        peepholes=False, grad_clipping=GRAD_CLIP, 
        nonlinearity=lasagne.nonlinearities.tanh, 
        mask_input=l_mask_prem, only_return_final=False)
    # The slicelayer extracts the cell output of the premise sentence
    l_lstm_prem_out = lasagne.layers.SliceLayer(l_lstm_prem, -1, axis=1)
    # LSTM layer for hypothesis
    # LSTM for premise and LSTM for hypothesis have different parameters
    l_lstm_hypo = lasagne.layers.LSTMLayer(l_in_hypo_embedding, K_HIDDEN, 
        peepholes=False, grad_clipping=GRAD_CLIP, 
        nonlinearity=lasagne.nonlinearities.tanh, 
        cell_init=l_lstm_prem_out, mask_input=l_mask_hypo)
    l_lstm_hypo_dropout = lasagne.layers.DropoutLayer(l_lstm_hypo, p=DROPOUT_RATE, rescale=True)
    # Isolate the last hidden unit output
    l_hypo_out = lasagne.layers.SliceLayer(l_lstm_hypo_dropout, -1, axis=1)
    # A softmax layer create probability distribution of the prediction
    l_out = lasagne.layers.DenseLayer(l_hypo_out, num_units=NUM_LABELS,
        W=lasagne.init.Normal(), nonlinearity=lasagne.nonlinearities.softmax)

    # The output of the net
    network_output_train = lasagne.layers.get_output(l_out, deterministic=False)
    network_output_test = lasagne.layers.get_output(l_out, deterministic=True)

    # Theano tensor for the targets
    target_values = T.ivector('target_output')

    # The loss function is calculated as the mean of the cross-entropy
    cost = lasagne.objectives.categorical_crossentropy(network_output_train, target_values).mean()
    from lasagne.regularization import l2, regularize_layer_params
    l2_penalty = regularize_layer_params(l_out, l2) * REGU
    cost = cost + l2_penalty
    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_out)

    # Compute ADAM updates for training
    print("Computing updates ...")
    # updates = lasagne.updates.adam(cost, all_params, learning_rate=LEARNING_RATE, beta1=0.9, beta2=0.999, epsilon=1e-08)
    updates = lasagne.updates.adam(cost, all_params, masks=[('EmbeddingLayer.W', embedding_w_mask)], learning_rate=LEARNING_RATE, beta1=0.9, beta2=0.999, epsilon=1e-08)

    """
    # Test
    test_prediction = lasagne.layers.get_output(l_out, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_values).mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                    dtype=theano.config.floatX)
    """

    # Theano functions for training and computing cost
    train_acc = T.mean(T.eq(T.argmax(network_output_test, axis=1), target_values), dtype=theano.config.floatX)
    print("Compiling functions ...")
    train = theano.function([l_in_prem.input_var, l_mask_prem.input_var, l_in_hypo.input_var, l_mask_hypo.input_var, target_values], [cost, train_acc], updates=updates, allow_input_downcast=True)

    # Theano function computing the validation loss and accuracy
    val_acc = T.mean(T.eq(T.argmax(network_output_test, axis=1), target_values), dtype=theano.config.floatX)
    validate = theano.function([l_in_prem.input_var, l_mask_prem.input_var, l_in_hypo.input_var, l_mask_hypo.input_var, target_values], [cost, val_acc], allow_input_downcast=True)

    print("Training ...")
    print('Regularization strength: ', REGU)
    print('Learning rate: ', LEARNING_RATE)
    print('Dropout rate: ', DROPOUT_RATE)
    print('Hidden size: ', K_HIDDEN)
    sys.stdout.flush()
    try:
        for epoch in range(NUM_EPOCHS):
            n = 0
            avg_cost = 0.0
            count = 0
            sub_epoch = 0
            train_acc = 0
            while n < TRAIN_SIZE:
                X_prem, X_prem_mask, X_hypo, X_hypo_mask, y = get_batch_data(n, data_train)
                err, acc = train(X_prem, X_prem_mask, X_hypo, X_hypo_mask, y)
                avg_cost += err
                train_acc += acc
                n += BATCH_SIZE
                count += 1

                if (n / BATCH_SIZE) % (TRAIN_SIZE / BATCH_SIZE / 5) == 0:
                    sub_epoch += 1
                    avg_cost /= count
                    print("Sub epoch {} average loss = {}, accuracy = {}".format(sub_epoch, avg_cost, train_acc / count * 100))
                    avg_cost = 0
                    count = 0
                    train_acc = 0


                    # Calculate validation accuracy
                    m = 0
                    val_err = 0
                    val_acc = 0
                    val_batches = 0
                    while m < VAL_SIZE:
                        X_prem, X_prem_mask, X_hypo, X_hypo_mask, y = get_batch_data(m, data_val)
                        err, acc = validate(X_prem, X_prem_mask, X_hypo, X_hypo_mask, y)
                        val_err += err
                        val_acc += acc
                        val_batches += 1
                        m += BATCH_SIZE
                        
                    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
                    print("  validation accuracy:\t\t{:.2f} %".format(
                    val_acc / val_batches * 100))
                    sys.stdout.flush()
            

    except KeyboardInterrupt:
        pass
Example #57
0
File: lstm.py Project: Coderx7/CNN
def main(num_epochs=NUM_EPOCHS):
    print("Building network ...")
    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    l_in = lasagne.layers.InputLayer(shape=(N_BATCH, WINDOW, 20))

    l_forward = lasagne.layers.LSTMLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP, only_return_final=True)
    l_backward = lasagne.layers.LSTMLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP, only_return_final=True, backwards=True)
    # Now, we'll concatenate the outputs to combine them.
    l_concat = lasagne.layers.ConcatLayer([l_forward, l_backward])
    # Our output layer is a simple dense connection, with 1 output unit
    l_out = lasagne.layers.DenseLayer(
        l_concat, num_units=3, nonlinearity=lasagne.nonlinearities.softmax)

    target_values = T.ivector('target_output')

    prediction = lasagne.layers.get_output(l_out)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_values)
    l1_penalty = regularize_layer_params(l_out, l1)
    loss = loss.mean() + lamda *  l1_penalty
    acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_values),dtype=theano.config.floatX)

    all_params = lasagne.layers.get_all_params(l_out)
    LEARNING_RATE = .01
    print("Computing updates ...")
    updates = lasagne.updates.nesterov_momentum(loss, all_params,LEARNING_RATE,0.95)
    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values],
                            loss, updates=updates)
    valid = theano.function([l_in.input_var, target_values],
                            [loss, acc])
    accuracy = theano.function(
        [l_in.input_var, target_values],acc )

    result = theano.function([l_in.input_var],prediction)

    best_acc=0

    print("Training ...")
    try:
        for epoch in range(NUM_EPOCHS):
            if epoch % 50 == 49:
                LEARNING_RATE *= 0.5
                updates = lasagne.updates.nesterov_momentum(loss, all_params,LEARNING_RATE,0.95)
                train = theano.function([l_in.input_var, target_values],
                                        loss, updates=updates)
            train_err = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(train_data, train_label, N_BATCH, WINDOW):
                inputs, targets = batch
                train_err += train(inputs, targets)
                train_batches += 1

            val_err = 0
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(valid_data, valid_label, N_BATCH, WINDOW):
                inputs, targets = batch
                err, acc = valid(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            val_acc = val_acc / val_batches
            if val_acc > best_acc:
                best_acc = val_acc

            # Then we print the results for this epoch:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, NUM_EPOCHS, time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            print("  validation accuracy:\t\t{:.2f} %".format(
                    val_acc * 100))
    except KeyboardInterrupt:
        pass
Example #58
0
def main(num_epochs=NUM_EPOCHS):
    print("Building network ...")
    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    l_in = lasagne.layers.InputLayer(shape=(N_BATCH, WINDOW, 20))

    l_forward = lasagne.layers.LSTMLayer(l_in,
                                         N_HIDDEN,
                                         grad_clipping=GRAD_CLIP,
                                         only_return_final=True)
    l_backward = lasagne.layers.LSTMLayer(l_in,
                                          N_HIDDEN,
                                          grad_clipping=GRAD_CLIP,
                                          only_return_final=True,
                                          backwards=True)
    # Now, we'll concatenate the outputs to combine them.
    l_concat = lasagne.layers.ConcatLayer([l_forward, l_backward])
    # Our output layer is a simple dense connection, with 1 output unit
    l_out = lasagne.layers.DenseLayer(
        l_concat, num_units=3, nonlinearity=lasagne.nonlinearities.softmax)

    target_values = T.ivector('target_output')

    prediction = lasagne.layers.get_output(l_out)
    loss = lasagne.objectives.categorical_crossentropy(prediction,
                                                       target_values)
    l1_penalty = regularize_layer_params(l_out, l1)
    loss = loss.mean() + lamda * l1_penalty
    acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_values),
                 dtype=theano.config.floatX)

    all_params = lasagne.layers.get_all_params(l_out)
    LEARNING_RATE = .01
    print("Computing updates ...")
    updates = lasagne.updates.nesterov_momentum(loss, all_params,
                                                LEARNING_RATE, 0.95)
    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values],
                            loss,
                            updates=updates)
    valid = theano.function([l_in.input_var, target_values], [loss, acc])
    accuracy = theano.function([l_in.input_var, target_values], acc)

    result = theano.function([l_in.input_var], prediction)

    best_acc = 0

    print("Training ...")
    try:
        for epoch in range(NUM_EPOCHS):
            if epoch % 50 == 49:
                LEARNING_RATE *= 0.5
                updates = lasagne.updates.nesterov_momentum(
                    loss, all_params, LEARNING_RATE, 0.95)
                train = theano.function([l_in.input_var, target_values],
                                        loss,
                                        updates=updates)
            train_err = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(train_data, train_label, N_BATCH,
                                             WINDOW):
                inputs, targets = batch
                train_err += train(inputs, targets)
                train_batches += 1

            val_err = 0
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(valid_data, valid_label, N_BATCH,
                                             WINDOW):
                inputs, targets = batch
                err, acc = valid(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            val_acc = val_acc / val_batches
            if val_acc > best_acc:
                best_acc = val_acc

            # Then we print the results for this epoch:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, NUM_EPOCHS,
                time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(train_err /
                                                      train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            print("  validation accuracy:\t\t{:.2f} %".format(val_acc * 100))
    except KeyboardInterrupt:
        pass