Beispiel #1
0
def test_aggregate_weighted_mean():
    from lasagne.objectives import aggregate
    x = theano.tensor.matrix('x')
    w = theano.tensor.matrix('w')
    assert theano.gof.graph.is_same_graph(aggregate(x, w), (x * w).mean())
    assert theano.gof.graph.is_same_graph(aggregate(x, w, mode='mean'),
                                          (x * w).mean())
Beispiel #2
0
def test_aggregate_weighted_mean():
    from lasagne.objectives import aggregate
    x = theano.tensor.matrix('x')
    w = theano.tensor.matrix('w')
    assert theano.gof.graph.is_same_graph(aggregate(x, w), (x * w).mean())
    assert theano.gof.graph.is_same_graph(aggregate(x, w, mode='mean'),
                                          (x * w).mean())
Beispiel #3
0
def test_aggregate_invalid():
    from lasagne.objectives import aggregate
    with pytest.raises(ValueError) as exc:
        aggregate(theano.tensor.matrix(), mode='asdf')
    assert 'mode must be' in exc.value.args[0]
    with pytest.raises(ValueError) as exc:
        aggregate(theano.tensor.matrix(), mode='normalized_sum')
    assert 'require weights' in exc.value.args[0]
Beispiel #4
0
def test_aggregate_invalid():
    from lasagne.objectives import aggregate
    with pytest.raises(ValueError) as exc:
        aggregate(theano.tensor.matrix(), mode='asdf')
    assert 'mode must be' in exc.value.args[0]
    with pytest.raises(ValueError) as exc:
        aggregate(theano.tensor.matrix(), mode='normalized_sum')
    assert 'require weights' in exc.value.args[0]
Beispiel #5
0
        def objective(layers_, target, **kwargs):
            out_a_layer = layers_['output_a']
            out_b_layer = layers_['output_b']

            # Get the outputs
            out_a, out_b = get_output([out_a_layer, out_b_layer])

            # Get the targets
            gt_a = T.cast(target[:, 0], 'int32')
            gt_b = target[:, 1].reshape((-1, 1))

            # Calculate the multi task loss
            cls_loss = aggregate(categorical_crossentropy(out_a, gt_a))
            reg_loss = aggregate(categorical_crossentropy(out_b, gt_b))
            loss = cls_loss + reg_loss
            return loss
Beispiel #6
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              l1=0,
              l2=0,
              tv=0,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    loss = aggregate(loss_function(network_output, target))

    if l1:
        loss += regularization.regularize_layer_params(
            layers[-2], regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers[-2], regularization.l2) * l2
    if tv:
        loss += T.mean(T.abs_(network_output[:, 1:] -
                              network_output[:, :-1]))*tv
    return loss
Beispiel #7
0
def test_maxpool_layer():
    l_in1 = InputLayer((None, 2))
    l_in2 = InputLayer((None, 20))
    l_hid = DenseLayer(l_in2, num_units=30, nonlinearity=rectify)
    l_pool = MaxpoolLayer([l_in1, l_hid])
    l_out = DenseLayer(l_pool, num_units=1, nonlinearity=sigmoid)

    bounds = theano.tensor.lmatrix('bounds')
    data = theano.tensor.matrix('data')
    targets = theano.tensor.matrix('targets')

    predictions = get_output(l_out, {l_in1: bounds, l_in2: data})
    loss = categorical_crossentropy(predictions, targets)
    loss = aggregate(loss, mode='mean')

    params = get_all_params(l_out)
    updates_sgd = sgd(loss, params, learning_rate=0.0001)

    train_function = theano.function([bounds, data, targets], updates=updates_sgd, allow_input_downcast=True)

    test_bounds = np.array([[0, 3], [3, 5], [5, 7]])
    test_X = np.random.randn(10, 20)
    test_Y = np.array([[0], [1], [0]])

    train_function(test_bounds, test_X, test_Y)
Beispiel #8
0
def objective(layers, loss_function, target, aggregate=aggregate, deterministic=False, get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(output_layer, deterministic=deterministic, **get_output_kw)
    losses = loss_function(network_output, target)
    return aggregate(losses)
Beispiel #9
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              aggregation_weights=None,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}

    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    if isfunction(aggregation_weights):
        weights = aggregation_weights(layers)
    else:
        weights = aggregation_weights
    loss = aggregate(loss_function(network_output, target), weights)

    if l1:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l2) * l2
    return loss
Beispiel #10
0
        def objective(layers_, target, **kwargs):
            out_a_layer = layers_['output_a']
            out_b_layer = layers_['output_b']

            # Get the outputs
            out_a, out_b = get_output([out_a_layer, out_b_layer])

            # Get the targets
            gt_a = T.cast(target[:, 0], 'int32')
            gt_b = target[:, 1].reshape((-1, 1))

            # Calculate the multi task loss
            cls_loss = aggregate(categorical_crossentropy(out_a, gt_a))
            reg_loss = aggregate(categorical_crossentropy(out_b, gt_b))
            loss = cls_loss + reg_loss
            return loss
Beispiel #11
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              aggregation_weights=None,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}

    output_layer = layers[-1]
    network_output = get_output(output_layer,
                                deterministic=deterministic,
                                **get_output_kw)
    if isfunction(aggregation_weights):
        weights = aggregation_weights(layers)
    else:
        weights = aggregation_weights
    loss = aggregate(loss_function(network_output, target), weights)

    if l1:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l2) * l2
    return loss
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              mode='mean',
              weights=None,
              deterministic=False,
              l1=0,
              l2=0,
              l3=0,
              l3_layers=[],
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    loss = aggregate(loss_function(network_output, target), weights=weights, mode=mode)

    if l1:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l2) * l2
    if l3:
        for layer in l3_layers:
            loss += regularization.regularize_layer_params(
                layer, regularization.l2) * l3
    return loss
Beispiel #13
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    """
    Default implementation of the NeuralNet Objective.
    :param layers: The underlying layers of the NeuralNetwork
    :param loss_function: The callable loss function to use
    :param target: the expected output

    :param aggregate: the aggregation function to use
    :param deterministic: Whether or not to get a deterministic output
    :param l1: Optional l1 regularization parameter
    :param l2: Optional l2 regularization parameter
    :param get_output_kw: optional kwargs to pass to :meth:`NeuralNetwork.get_output`
    :return: The total calculated loss
    """
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(
        output_layer, deterministic=deterministic, **get_output_kw)
    loss = aggregate(loss_function(network_output, target))

    if l1:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(
            layers.values(), regularization.l2) * l2
    return loss
def bc_with_ranking(y_prediction, y_true):
    """ Trying to combine ranking loss with numeric precision"""
    # first get the log loss like normal
    #logloss = aggregate(T.nnet.binary_crossentropy(y_pred, y_true))
    
    # clip the probabilities to keep stability
    y_pred_clipped = T.clip(y_prediction, _EPSILON, 1.-_EPSILON)

    logloss = aggregate(T.nnet.categorical_crossentropy(y_prediction, y_true))
    
    y_pred = y_pred_clipped[:,1]
    # next, build a rank loss

    # translate into the raw scores before the logit
    y_pred_score = T.log(y_pred / (1. - y_pred))


    # determine what the maximum score for a zero outcome is
    max_zerooutcome = T.max(y_pred_score * (y_true <1.))

    mean_oneoutcome = T.mean(y_pred_score * (y_true > 0.1))

    border = ifelse(T.gt(max_zerooutcome, mean_oneoutcome), mean_oneoutcome, max_zerooutcome)

    # determine how much each score is above or below it
    rankloss = y_pred_score - border

    # only keep losses for positive outcomes
    rankloss = rankloss * y_true

    # only keep losses where the score is below the max
    rankloss = T.sqr(T.clip(rankloss, -100., 0.))

    # average the loss for just the positive outcomes
    rankloss = T.sum(rankloss) / (T.sum(y_true > 0.1) + 1.)



    # determine what the maximum score for a zero outcome is
    min_oneoutcome = T.min(y_pred_score * (y_true > 0.1))

    mean_zerooutcome = T.mean(y_pred_score * (y_true < 1.))

    border = ifelse(T.lt(min_oneoutcome, mean_zerooutcome), mean_zerooutcome, min_oneoutcome)

    # determine how much each score is above or below it
    rankloss_ = y_pred_score - border

    # only keep losses for positive outcomes
    rankloss_ = rankloss_ * (1. - y_true)

    # only keep losses where the score is below the max
    rankloss_ = T.sqr(T.clip(rankloss_, 0., 100.))

    # average the loss for just the positive outcomes
    rankloss_ = T.sum(rankloss_, axis=0) / (T.sum(y_true < 1.) + 1.)

    # return (rankloss + 1) * logloss - an alternative to try
    #return rankloss + logloss
    return 0.01*rankloss_ + 0.01*rankloss + logloss
Beispiel #15
0
    def build_model(self, train_set, test_set, validation_set=None):
        super(CAE, self).build_model(train_set, test_set, validation_set)

        y_train = get_output(self.model, self.sym_x)
        loss = aggregate(squared_error(y_train, self.sym_x), mode='mean')
        loss += +1e-4 * lasagne.regularization.regularize_network_params(
            self.model, lasagne.regularization.l2)

        y_test = get_output(self.model, self.sym_x, deterministic=True)
        loss_test = aggregate(squared_error(y_test, self.sym_x), mode='mean')

        grads_collect = T.grad(loss, self.trainable_model_params)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        clip_grad, max_norm = 1, 5
        mgrads = total_norm_constraint(grads_collect, max_norm=max_norm)
        mgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
        updates = adam(mgrads, self.trainable_model_params, self.sym_lr,
                       sym_beta1, sym_beta2)

        # Training function
        x_batch = self.sh_train_x[self.batch_slice]

        givens = {self.sym_x: x_batch}
        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        outputs = [loss]
        f_train = theano.function(inputs=inputs,
                                  outputs=outputs,
                                  givens=givens,
                                  updates=updates)

        # Validation and test function
        givens = {self.sym_x: self.sh_test_x}
        f_test = theano.function(inputs=[], outputs=[loss_test], givens=givens)

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 1e-6
        self.train_args['outputs']['loss'] = '%0.6f'

        self.test_args['outputs']['loss_test'] = '%0.6f'

        return f_train, f_test, None, self.train_args, self.test_args, self.validate_args
 def _get_loss_function(self):
     # TODO: remove `or True`
     if self._loss is None:
         if self._regression:
             self._loss = squared_error
         else:
             self._loss = categorical_crossentropy
     return aggregate(self._loss(self._get_output(), self.t_label), mode='mean')
 def __call__(self,layers,
           loss_function,
           target,
           aggregate=T.mean,
           **kwargs):
     output_layer = layers[-1]
     network_output = get_output(output_layer, **kwargs)
     return -aggregate(self.auc_error(network_output[:,1], target))
Beispiel #18
0
 def _get_loss_function(self):
     # TODO: remove `or True`
     if self._loss is None:
         if self._regression:
             self._loss = squared_error
         else:
             self._loss = categorical_crossentropy
     return aggregate(self._loss(self._get_output(), self.t_label), mode='mean')
Beispiel #19
0
Datei: ctc.py Projekt: choko/ctc
	def compute_cost(rnn_outputs, forward_probabilities, backward_pointers, x_end, y_end, label):
		def backward_step(backlinks, position):
			new_position = backlinks[position]
			return new_position, position

		initial_state = T.argmax(forward_probabilities[x_end-1,y_end-2:y_end]) + y_end - 2

		results, _ = theano.scan(fn = backward_step, sequences = backward_pointers[0:x_end,:], outputs_info = [initial_state, None], go_backwards = True)
		alignment = label[results[1][::-1]]

		return aggregate(categorical_crossentropy(rnn_outputs[0:x_end], alignment), mode='sum')
Beispiel #20
0
    def __init__(self,
                 num_features,
                 num_layers,
                 num_nodes,
                 dropout,
                 learning_rate,
                 weight_decay,
                 verbose=False):
        self.verbose = verbose
        self.input_var = T.matrix('inputs')
        self.target_var = T.ivector('targets')

        self.num_features = num_features
        self.num_layers = num_layers
        self.num_nodes = num_nodes
        self.dropout = dropout
        self.network = self.build_network()

        self.prediction = lasagne.layers.get_output(self.network,
                                                    deterministic=True)
        self.predict_function = theano.function([self.input_var],
                                                self.prediction,
                                                allow_input_downcast=True)

        self.loss = categorical_crossentropy(self.prediction, self.target_var)
        self.loss = aggregate(self.loss, mode='mean')

        if not os.path.exists('models'):
            os.mkdir('models')

        # L2 regularization with weight decay
        weightsl2 = lasagne.regularization.regularize_network_params(
            self.network, lasagne.regularization.l2)
        weightsl1 = lasagne.regularization.regularize_network_params(
            self.network, lasagne.regularization.l1)
        self.loss += weight_decay * weightsl2  #+ 1e-5*weightsl1

        # ADAM training
        params = lasagne.layers.get_all_params(self.network, trainable=True)
        updates = lasagne.updates.adagrad(self.loss,
                                          params,
                                          learning_rate=learning_rate)
        #updates = lasagne.updates.adam(self.loss, params)
        #updates = lasagne.updates.nesterov_momentum(self.loss, params,
        #                learning_rate=learning_rate, momentum=momentum)

        self.train = theano.function([self.input_var, self.target_var],
                                     self.loss,
                                     updates=updates)

        self.create_test_function()
        self.create_bayes_test_function()
Beispiel #21
0
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']), input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        fc = layers.DenseLayer(prev_layer, model['units'], nonlinearity=nonlin)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] == OUTPUT_BOUNDED:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    loss = objectives.squared_error(predictions, targets_var)
    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)

    test_prediction = layers.get_output(output_layer, deterministic=True)
    test_loss = objectives.squared_error(test_prediction,  targets_var)
    test_loss = test_loss.mean()

    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {'train': train_fn, 'eval': val_fn, 'pred': pred_fn, 'layers': output_layer}
Beispiel #22
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              get_output_kw=None):
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(output_layer,
                                deterministic=deterministic,
                                **get_output_kw)
    losses = loss_function(network_output, target)
    return aggregate(losses)
Beispiel #23
0
def build_loss(targets, prediction, optimization):
	""" setup loss function with weight decay regularization """

	if optimization["objective"] == 'categorical':
		loss = objectives.categorical_crossentropy(prediction, targets)

	elif optimization["objective"] == 'binary':
		prediction = T.clip(prediction, 1e-7, 1-1e-7)
		loss = -(targets*T.log(prediction) + (1.0-targets)*T.log(1.0-prediction))
		# loss = objectives.binary_crossentropy(prediction[:,loss_index], targets[:,loss_index])

	elif (optimization["objective"] == 'squared_error'):
		loss = objectives.squared_error(prediction, targets)

	loss = objectives.aggregate(loss, mode='mean')

	return loss
Beispiel #24
0
 def objective(layers, loss_function, target, aggregate=aggregate,
               deterministic=False, get_output_kw=None):
     if get_output_kw is None:
         get_output_kw = {}
     output_layer = layers[-1]
     first_layer = layers[1]
     network_output = lasagne.layers.get_output(
         output_layer, deterministic=deterministic, **get_output_kw)
     if not deterministic:
         losses = loss_function(network_output, target) \
                 + l2 * regularization.regularize_network_params(
                     output_layer, regularization.l2) \
                 + l1 * regularization.regularize_layer_params(
                     first_layer, regularization.l1)
     else:
         losses = loss_function(network_output, target)
     return aggregate(losses)
Beispiel #25
0
 def get_functions():
 
     input_layer=layers.InputLayer(shape=(BATCH_SIZE, INPUT_LENGTH))
     print "input_layer size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1])
     layer = input_layer
 
     for layer_num in range(len(NUM_UNITS_HIDDEN_LAYER)):
         print "layer_num-"+str(layer_num)
         layer=layers.DenseLayer(layer,
                                    num_units=NUM_UNITS_HIDDEN_LAYER[layer_num],
                                    W=lasagne.init.Normal(0.01),
                                    nonlinearity=nonlinearities.tanh)
 
 
     output_layer=layers.DenseLayer(layer,
                                    num_units=OUTPUT_SIZE,
                                    nonlinearity=nonlinearities.softmax)
 
 
     network_output=get_output(output_layer)
     expected_output=T.ivector()
 
 
     loss_train=aggregate(categorical_crossentropy(network_output, expected_output), mode='mean')
 
     all_weigths=layers.get_all_params(output_layer)
 
     update_rule=lasagne.updates.nesterov_momentum(loss_train, all_weigths, learning_rate=LEARNING_RATE)
     
     print "input_layer_end size: " + str(input_layer.shape[0])+","+ str(input_layer.shape[1])
     train_function=theano.function(inputs=[input_layer.input_var, expected_output],
                                    outputs=loss_train,
                                    updates=update_rule,
                                    allow_input_downcast=True)
 
     prediction = T.argmax(network_output, axis=1)
     accuracy = T.mean(T.eq(prediction, expected_output), dtype=theano.config.floatX)  # @UndefinedVariable
 
     test_function=theano.function(inputs=[input_layer.input_var, expected_output],
                                   outputs=[loss_train, accuracy, prediction],
                                   allow_input_downcast=True)
     
     output_function=theano.function([input_layer.input_var],get_output(output_layer),
                                   allow_input_downcast=True)
 
     return train_function,test_function,output_function
Beispiel #26
0
    def compute_cost(rnn_outputs, forward_probabilities, backward_pointers,
                     x_end, y_end, label):
        def backward_step(backlinks, position):
            new_position = backlinks[position]
            return new_position, position

        initial_state = T.argmax(
            forward_probabilities[x_end - 1, y_end - 2:y_end]) + y_end - 2

        results, _ = theano.scan(fn=backward_step,
                                 sequences=backward_pointers[0:x_end, :],
                                 outputs_info=[initial_state, None],
                                 go_backwards=True)
        alignment = label[results[1][::-1]]

        return aggregate(categorical_crossentropy(rnn_outputs[0:x_end],
                                                  alignment),
                         mode='sum')
Beispiel #27
0
def grad_supervised(l_ram, labels):
    """
    return:
      loss = 1 / M * sum_i_{1..M} cross_entroy_loss(groundtruth, a_T)
      grads = theano.grad(loss, params)
    inputs:
      labels = (n_batch,)
          [theano tensor variable]
    """
    loc_mean_t, loc_t, h_t, prob, pred = lasagne.layers.get_output(l_ram)
    params = lasagne.layers.get_all_params(l_ram, trainable=True)

    ### loss estimation (cross entropy loss)
    loss = categorical_crossentropy(prob, labels)
    loss = aggregate(loss, mode='mean')

    ### gradient estimation
    grads = theano.grad(loss, params, disconnected_inputs='ignore')

    return loss, grads
Beispiel #28
0
def weight_decay_objective(layers,
                        loss_function,
                        target,
                        penalty_conv=1e-8,
                        penalty_conv_type = l2,
                        penalty_output=1e-8,
                        penalty_output_type = l2,
                        aggregate=aggregate,
                        deterministic=False,
                        get_output_kw={}):
    '''
    Defines L2 weight decay on network weights. 
    '''
    net_out = get_output(layers[-1], deterministic=deterministic,
                        **get_output_kw)
    loss = loss_function(net_out, target)
    p1 = penalty_conv * regularize_layer_params(layers[1], penalty_conv_type)
    p2 = penalty_output * regularize_layer_params(layers[-1], penalty_output_type)
    losses = loss + p1 + p2
    return aggregate(losses)
Beispiel #29
0
def grad_supervised(l_ram, labels):
    """
    return:
      loss = 1 / M * sum_i_{1..M} cross_entroy_loss(groundtruth, a_T)
      grads = theano.grad(loss, params)
    inputs:
      labels = (n_batch,)
          [theano tensor variable]
    """
    loc_mean_t, loc_t, h_t, prob, pred = lasagne.layers.get_output(l_ram)
    params = lasagne.layers.get_all_params(l_ram, trainable=True)

    ### loss estimation (cross entropy loss)
    loss = categorical_crossentropy(prob, labels)
    loss = aggregate(loss, mode='mean')

    ### gradient estimation
    grads = theano.grad(loss, params, disconnected_inputs='ignore')

    return loss, grads
Beispiel #30
0
def objective(
    output_layer,
    regularize_layers,
    target,
    loss_function=squared_error,
    aggregate=aggregate,
    deterministic=False,
    l1=0,
    l2=0,
    tv=0,
):
    network_output = layers.get_output(output_layer, deterministic=deterministic)
    loss = aggregate(loss_function(network_output, target))
    for layer in regularize_layers:
        if l1:
            loss += regularization.regularize_layer_params(layer, regularization.l1) * l1
        if l2:
            loss += regularization.regularize_layer_params(layer, regularization.l2) * l2
    if tv:
        loss += T.mean(T.abs_(network_output[:, 1:] - network_output[:, :-1])) * tv
    return loss
Beispiel #31
0
def weight_decay_objective(layers,
                           loss_function,
                           target,
                           penalty_conv=1e-8,
                           penalty_conv_type=l2,
                           penalty_output=1e-8,
                           penalty_output_type=l2,
                           aggregate=aggregate,
                           deterministic=False,
                           get_output_kw={}):
    '''
    Defines L2 weight decay on network weights. 
    '''
    net_out = get_output(layers[-1],
                         deterministic=deterministic,
                         **get_output_kw)
    loss = loss_function(net_out, target)
    p1 = penalty_conv * regularize_layer_params(layers[1], penalty_conv_type)
    p2 = penalty_output * regularize_layer_params(layers[-1],
                                                  penalty_output_type)
    losses = loss + p1 + p2
    return aggregate(losses)
Beispiel #32
0
def objective(layers,
              loss_function,
              target,
              aggregate=aggregate,
              deterministic=False,
              l1=0,
              l2=0,
              get_output_kw=None):
    """
    Default implementation of the NeuralNet objective.

    :param layers: The underlying layers of the NeuralNetwork
    :param loss_function: The callable loss function to use
    :param target: the expected output

    :param aggregate: the aggregation function to use
    :param deterministic: Whether or not to get a deterministic output
    :param l1: Optional l1 regularization parameter
    :param l2: Optional l2 regularization parameter
    :param get_output_kw: optional kwargs to pass to
                          :meth:`NeuralNetwork.get_output`
    :return: The total calculated loss
    """
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    network_output = get_output(output_layer,
                                deterministic=deterministic,
                                **get_output_kw)
    loss = aggregate(loss_function(network_output, target))

    if l1:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l1) * l1
    if l2:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l2) * l2
    return loss
Beispiel #33
0
def test_aggregate_sum():
    from lasagne.objectives import aggregate
    x = theano.tensor.matrix('x')
    assert theano.gof.graph.is_same_graph(aggregate(x, mode='sum'), x.sum())
Beispiel #34
0
def train(options):
    # -------- setup options and data ------------------
    np.random.seed(options['seed'])

    # Load options
    host = socket.gethostname() # get computer hostname
    start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M")

    model = importlib.import_module(options['model_file'])

    # ---------- build model and compile ---------------
    input_batch = T.tensor4() # input image sequences
    target = T.tensor4() # target image

    print('Build model...')
    model = model.Model(**options['modelOptions'])

    print('Compile ...')
    net, outputs, filters = model.build_model(input_batch)

    # compute loss
    outputs = get_output(outputs + [filters])
    output_frames = outputs[:-1]
    output_filter = outputs[-1]

    train_losses = []
    for i in range(options['modelOptions']['target_seqlen']):
        output_frame = output_frames[i]

        if options['loss'] == 'squared_error':
            frame_loss = squared_error(output_frame, target[:, [i], :, :])
        elif options['loss'] == 'binary_crossentropy':
            # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436
            output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps)
            frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:])
        else:
            assert False

        train_losses.append(aggregate(frame_loss))

    train_loss = sum(train_losses) / options['modelOptions']['target_seqlen']

    # update
    sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate

    layers = get_all_layers(net)
    all_params = get_all_params(layers, trainable = True)
    updates = adam(train_loss, all_params, learning_rate=sh_lr)
    _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True)
    _test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True)

    # ------------ data setup ----------------
    print('Prepare data...')
    dataset = importlib.import_module(options['dataset_file'])
    dh = dataset.DataHandler(**options['datasetOptions'])

    # ------------ training setup ----------------
    if options['pretrained_model_path'] is not None:
        checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb'))
        model_values = checkpoint['model_values'] # overwrite the values of model parameters
        lasagne.layers.set_all_param_values(layers, model_values)

        history_train = checkpoint['history_train']
        start_epoch = checkpoint['epoch'] + 1
        options['batch_size'] = checkpoint['options']['batch_size']
        sh_lr.set_value(floatX(checkpoint['options']['learning_rate']))
    else:
        start_epoch = 0
        history_train = []

    # ------------ actual training ----------------
    print 'Start training ...'

    input_seqlen = options['modelOptions']['input_seqlen']
    for epoch in range(start_epoch, start_epoch + options['num_epochs']):
        epoch_start_time = time.time()

        history_batch = []
        for batch_index in range(0, options['batches_per_epoch']):

            batch = dh.GetBatch() # generate data on the fly
            if options['dataset_file'] == 'datasets.stereoCarsColor':
                batch_input = batch[..., :input_seqlen].squeeze(axis=4)  # first frames
                batch_target = batch[..., input_seqlen:].squeeze(axis=4)  # last frame
            else:
                batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames
                batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame

            # train
            loss_train = _train(batch_input, batch_target)
            history_batch.append(loss_train)

            print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time))
            print("  training loss:\t{:.6f}".format(loss_train.item()))

        # clear the screen
        display.clear_output(wait=True)

        # print statistics
        history_train.append(np.mean(history_batch))
        history_batch = []
        print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time))
        print("  training loss:\t{:.6f}".format(history_train[epoch].item()))

        # set new learning rate (maybe this is unnecessary with adam updates)
        if (epoch+1) % options['decay_after'] == 0:
            options['learning_rate'] = sh_lr.get_value() * 0.5
            print "New LR:", options['learning_rate']
            sh_lr.set_value(floatX(options['learning_rate']))

        # save the model
        if (epoch+1) % options['save_after'] == 0:
            save_model(layers, epoch, history_train, start_time, host, options)
            print("Model saved")
Beispiel #35
0
def modifiedObjective(layers,
                      loss_function,
                      target,
                      aggregate=aggregate,
                      deterministic=False,
                      l1=0,
                      l2=0,
                      logitSens=0,
                      probSens=0,
                      lossSens=0,
                      std=None,
                      get_output_kw=None):
    """
    Modified implementation of the NeuralNet objective.

    :param layers: The underlying layers of the NeuralNetwork
    :param loss_function: The callable loss function to use
    :param target: the expected output
    :param aggregate: the aggregation function to use
    :param deterministic: Whether or not to get a deterministic output
    :param l1: Optional l1 regularization parameter
    :param l2: Optional l2 regularization parameter
    :param lossSens: Optional loss sensitivity regularization parameter
    :param lossSens: Optional loss sensitivity regularization parameter
    :param lossSens: Optional loss sensitivity regularization parameter
    :param get_output_kw: optional kwargs to pass to
                          :meth:`NeuralNetwork.get_output`
    :return: The total calculated loss
    """
    if get_output_kw is None:
        get_output_kw = {}
    output_layer = layers[-1]
    logit_layer = layers[-2]
    input_layer = layers[0]
    network_input = input_layer.input_var
    network_output = get_output(output_layer,
                                deterministic=deterministic,
                                **get_output_kw)
    logit_output = get_output(logit_layer,
                              deterministic=deterministic,
                              **get_output_kw)

    L = loss_function(
        network_output,
        lasagne.utils.one_hot(target, output_layer.output_shape[1]))
    loss = aggregate(L)

    if l1:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l1) * l1

    if l2:
        loss += regularization.regularize_layer_params(layers.values(),
                                                       regularization.l2) * l2

    # logit sensitivity
    if logitSens:
        logit = T.sum(
            logit_output *
            lasagne.utils.one_hot(target, output_layer.output_shape[1]),
            axis=1)
        G_logit = T.grad(T.sum(logit), network_input)

        if std is not None:
            G_logit = std * G_logit

        # Sparse saliency regularization
        absG_logit = T.abs_(G_logit)
        sumAbsG_logit = T.sum(absG_logit, axis=(1, 2, 3))
        loss += aggregate(sumAbsG_logit) * logitSens

    # probability sensitivity
    if probSens:
        prob = T.sum(
            network_output *
            lasagne.utils.one_hot(target, output_layer.output_shape[1]),
            axis=1)
        G_prob = T.grad(T.sum(prob), network_input)

        if std is not None:
            G_prob = std * G_prob

        # Sparse saliency regularization
        absG_prob = T.abs_(G_prob)
        sumAbsG_prob = T.sum(absG_prob, axis=(1, 2, 3))
        loss += aggregate(sumAbsG_prob) * probSens

    # Loss sensitivity
    if lossSens:
        G_loss = theano.grad(T.sum(L), network_input)
        if std is not None:
            G_loss = std * G_loss
        absG_loss = T.abs_(G_loss)
        loss += aggregate(T.sum(absG_loss, axis=(1, 2, 3))) * lossSens

        # Double Backpropagation, uncomment if desired
        #sqG = G**2
        #sumSqG = T.sum(sqG,axis = (1,2,3))
        #loss += aggregate(sumSqG) * tv
    return loss
Beispiel #36
0
def test_aggregate_weighted_normalized_sum():
    from lasagne.objectives import aggregate
    x = theano.tensor.matrix('x')
    w = theano.tensor.matrix('w')
    assert theano.gof.graph.is_same_graph(aggregate(x, w, 'normalized_sum'),
                                          (x * w).sum() / w.sum())
 def loss(x, t):
     return aggregate(binary_crossentropy(x, t))
def run_network(data=None, num_epochs=10, ratio=0.5):
    try:

        global_start_time = time()
        sequence_length = 50
        batchsize = 512
        path_to_dataset = 'household_power_consumption.txt'

        # Loading the data

        if data is None:
            print 'Loading data... '
            X_train, y_train, X_test, y_test = data_power_consumption(
                path_to_dataset, sequence_length, ratio)
        else:
            X_train, y_train, X_test, y_test = data

        val_ratio = 0.005
        val_rows = round(val_ratio * X_train.shape[0])

        X_val = X_train[:val_rows]
        y_val = y_train[:val_rows]
        y_val = np.reshape(y_val, (y_val.shape[0], 1))
        X_train = X_train[val_rows:]
        y_train = y_train[val_rows:]
        

        # Creating the Theano variables
        input_var = T.tensor3('inputs')
        target_var = T.matrix('targets')

        # Building the Theano expressions on these variables
        network = build_model(input_var)

        prediction = lasagne.layers.get_output(network)
        loss = squared_error(prediction, target_var)
        loss = aggregate(loss)

        params = lasagne.layers.get_all_params(network, trainable=True)
        updates = rmsprop(loss, params, learning_rate=0.001)

        test_prediction = lasagne.layers.get_output(network,
                                                    deterministic=True)
        test_loss = squared_error(test_prediction, target_var)
        test_loss = aggregate(test_loss)

        # Compiling the graph by declaring the Theano functions
        compile_time = time()

        print 'Data:'
        print 'X_train ', X_train.shape, ' y_train ', y_train.shape
        print 'X_val ', X_val.shape, ' y_val ', y_val.shape
        print 'X_test ', X_test.shape, ' y_test ', y_test.shape

        print "Compiling..."
        train_fn = theano.function([input_var, target_var],
                                   loss, updates=updates)
        val_fn = theano.function([input_var, target_var],
                                 test_loss)
        get_pred_fn = theano.function([input_var], prediction)
        print "Compiling time : ", time() - compile_time

        # For loop that goes each time through the hole training
        # and validation data
        # T R A I N I N G
        # - - - - - - - -
        print "Starting training...\n"
        for epoch in range(num_epochs):

            # Going over the training data
            train_err = 0
            train_batches = 0
            start_time = time()
            nb_batches = X_train.shape[0] / batchsize
            time_line = np.zeros(nb_batches)
            for batch in iterate_minibatches(X_train, y_train,
                                             batchsize, shuffle=True):
                current_time = time()
                inputs, targets = batch
                train_err += train_fn(inputs, targets)
                train_batches += 1
                str_out = "\rTrain Batch  " + str(train_batches)
                str_out += "/" + str(nb_batches)
                str_out += "  |  Loss : " + str(train_err / train_batches)[:7]
                str_out += "  |  Remaining time (s) : "
                remaining_seconds = time() - current_time
                remaining_seconds *= (nb_batches - train_batches)
                time_line[train_batches - 1] = round(remaining_seconds)
                if (train_batches - 1) % 5 == 0:
                    durations = time_line[train_batches-1: train_batches+50]
                    durations = np.mean([t for t in durations if t > 0])
                str_out += str(durations)
                sys.stdout.write(str_out)
                sys.stdout.flush()

            print "\nGoing through validation data"
            # Going over the validation data
            val_err = 0
            val_batches = 0
            for batch in iterate_minibatches(
                    X_val, y_val, batchsize, shuffle=False):
                inputs, targets = batch
                err = val_fn(inputs, targets)
                val_err += err
                val_batches += 1

            # Then we print the results for this epoch:
            # train_batches - 1 because started at 1 and not 0
            print "training loss:\t\t\t" + str(train_err / train_batches)
            print "validation loss:\t\t" + str(val_err / val_batches)
            print("Epoch {} of {} took {:.3f}s \n\n".format(
                epoch + 1, num_epochs, time() - start_time))

        # Now that the training is over, let's test the network:
        test_err = 0
        test_batches = 0
        for batch in iterate_minibatches(
                X_test, y_test, batchsize, shuffle=False):
            inputs, targets = batch
            err = val_fn(inputs, targets)
            test_err += err
            test_batches += 1
        print "\nFinal results in {0} seconds:".format(
            time()-global_start_time)
        print "Test loss:\t\t\t{:.6f}".format(test_err / test_batches)

        prediction_size = 200
        predicted = get_pred_fn(X_test[:prediction_size])

        try:
            plt.plot(predicted)
            plt.plot(y_test[prediction_size])
            plt.show(block=False)
        except Exception as e:
            print str(e)
            print "predicted = ", repr(
                np.reshape(predicted[:prediction_size], (prediction_size,)))
            print '\n'
            print "y = ", repr(
                np.reshape(y_test[:prediction_size], (prediction_size,)))
        return network
    except KeyboardInterrupt:
        return network
Beispiel #39
0
def test_aggregate_sum():
    from lasagne.objectives import aggregate
    x = theano.tensor.matrix('x')
    assert theano.gof.graph.is_same_graph(aggregate(x, mode='sum'), x.sum())
Beispiel #40
0
 def aggregated_loss_func(prediction, target, weights=None):
     loss = loss_func(prediction, target)
     return aggregate(loss, mode=loss_aggregation_mode, weights=weights)
Beispiel #41
0
def do_regression(num_epochs=2, # No. of epochs to train
                  init_file=None,  # Saved parameters to initialise training
                  epoch_size=680780,  # Whole dataset size
                  valid_size=34848,
                  train_batch_multiple=10637,  # No. of minibatches per batch
                  valid_batch_multiple=1089,  # No. of minibatches per batch
                  train_minibatch_size=64, 
                  valid_minibatch_size=32,
                  eval_multiple=50,  # No. of minibatches to ave. in report
                  save_model=True,
                  input_width=19,
                  rng_seed=100009,
                  cross_val=0,  # Cross-validation subset label
                  dataver=1,  # Label for different runs/architectures/etc
                  rate_init=1.0,
                  rate_decay=0.999983):

    ###################################################
    ################# 0. User inputs ##################
    ###################################################
    for i in range(1,len(sys.argv)):
        if sys.argv[i].startswith('-'):
            option = sys.argv[i][1:]
            if option == 'i': init_file = sys.argv[i+1]
            elif option[0:2] == 'v=' : dataver = int(option[2:])
            elif option[0:3] == 'cv=' : cross_val = int(option[3:])
            elif option[0:3] == 'rs=' : rng_seed = int(option[3:])
            elif option[0:3] == 'ri=' : rate_init = np.float32(option[3:])
            elif option[0:3] == 'rd=' : rate_decay = np.float32(option[3:])
                                
    print("Running with dataver %s" % (dataver))
    print("Running with cross_val %s" % (cross_val))
    
    
    ###################################################
    ############# 1. Housekeeping values ##############
    ###################################################
    # Batch size is possibly not equal to epoch size due to memory limits
    train_batch_size = train_batch_multiple*train_minibatch_size 
    assert epoch_size >= train_batch_size
    
    # Number of times we expect the training/validation generator to be called
    max_train_gen_calls = (num_epochs*epoch_size)//train_batch_size 

    # Number of evaluations (total minibatches / eval_multiple)
    num_eval = max_train_gen_calls*train_batch_multiple / eval_multiple
    
    
    ###################################################
    ###### 2. Define model and theano variables #######
    ###################################################
    if rng_seed is not None:
        print("Setting RandomState with seed=%i" % (rng_seed))
        rng = np.random.RandomState(rng_seed)
        set_rng(rng)
    
    print("Defining variables...")
    index = T.lscalar() # Minibatch index
    x = T.tensor3('x') # Inputs 
    y = T.fvector('y') # Target
    
    print("Defining model...")
    network_0 = build_1Dregression_v1(
                        input_var=x,
                        input_width=input_width,
                        nin_units=12,
                        h_num_units=[64,128,256,128,64],
                        h_grad_clip=1.0,
                        output_width=1
                        )
                        
    if init_file is not None:
        print("Loading initial model parametrs...")
        init_model = np.load(init_file)
        init_params = init_model[init_model.files[0]]           
        LL.set_all_param_values([network_0], init_params)
        
    
    ###################################################                                
    ################ 3. Import data ###################
    ###################################################
    # Loading data generation model parameters
    print("Defining shared variables...")
    train_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX),
                                borrow=True) 
    train_set_x = theano.shared(np.zeros((1,1,1), dtype=theano.config.floatX),
                                borrow=True)
    
    valid_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX),
                                borrow=True)
    valid_set_x = theano.shared(np.zeros((1,1,1), dtype=theano.config.floatX),
                                borrow=True)
    
    # Validation data (pick a single augmented instance, rand0 here)
    print("Creating validation data...")    
    chunk_valid_data = np.load(
        "./valid/data_valid_augmented_cv%s_t%s_rand0.npy" 
        % (cross_val, input_width)
        ).astype(theano.config.floatX)
    chunk_valid_answers = np.load(
        "./valid/data_valid_expected_cv%s.npy" 
        % (cross_val)
        ).astype(theano.config.floatX)     
    
    print("chunk_valid_answers.shape", chunk_valid_answers.shape)
    print("Assigning validation data...")
    valid_set_y.set_value(chunk_valid_answers[:])
    valid_set_x.set_value(chunk_valid_data.transpose(0,2,1))
    
    # Create output directory
    if not os.path.exists("output_cv%s_v%s" % (cross_val, dataver)):
        os.makedirs("output_cv%s_v%s" % (cross_val, dataver))
    
    
    ###################################################                                
    ########### 4. Create Loss expressions ############
    ###################################################
    print("Defining loss expressions...")
    prediction_0 = LL.get_output(network_0) 
    train_loss = aggregate(T.abs_(prediction_0 - y.dimshuffle(0,'x')))
    
    valid_prediction_0 = LL.get_output(network_0, deterministic=True)
    valid_loss = aggregate(T.abs_(valid_prediction_0 - y.dimshuffle(0,'x')))
    
    
    ###################################################                                
    ############ 5. Define update method  #############
    ###################################################
    print("Defining update choices...")
    params = LL.get_all_params(network_0, trainable=True)
    learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX)
    
    updates = lasagne.updates.adadelta(train_loss, params,
                                       learning_rate=learn_rate)
    
    
    ###################################################                                
    ######### 6. Define train/valid functions #########
    ###################################################    
    print("Defining theano functions...")
    train_model = theano.function(
        [index, learn_rate],
        train_loss,
        updates=updates,
        givens={
            x: train_set_x[(index*train_minibatch_size):
                            ((index+1)*train_minibatch_size)],
            y: train_set_y[(index*train_minibatch_size):
                            ((index+1)*train_minibatch_size)]  
        }
    )
    
    validate_model = theano.function(
        [index],
        valid_loss,
        givens={
            x: valid_set_x[index*valid_minibatch_size:
                            (index+1)*valid_minibatch_size],
            y: valid_set_y[index*valid_minibatch_size:
                            (index+1)*valid_minibatch_size]
        }
    )
    
    
    ###################################################                                
    ################ 7. Begin training ################
    ###################################################  
    print("Begin training...")
    sys.stdout.flush()
    
    cum_iterations = 0
    this_train_loss = 0.0
    this_valid_loss = 0.0
    best_valid_loss = np.inf
    best_iter = 0
    
    train_eval_scores = np.empty(num_eval)
    valid_eval_scores = np.empty(num_eval)
    eval_index = 0
    aug_index = 0
    
    for batch in range(max_train_gen_calls):
        start_time = time.time()        
        chunk_train_data = np.load(
            "./train/data_train_augmented_cv%s_t%s_rand%s.npy" %
            (cross_val, input_width, aug_index)
            ).astype(theano.config.floatX)
        chunk_train_answers = np.load(
            "./train/data_train_expected_cv%s.npy" % 
            (cross_val)
            ).astype(theano.config.floatX)     
            
        train_set_y.set_value(chunk_train_answers[:])
        train_set_x.set_value(chunk_train_data.transpose(0, 2, 1))
        
        # Iterate over minibatches in each batch
        for mini_index in range(train_batch_multiple):
            this_rate = np.float32(rate_init*(rate_decay**cum_iterations))
            this_train_loss += train_model(mini_index, this_rate)
            cum_iterations += 1
            
            # Report loss 
            if (cum_iterations % eval_multiple == 0):
                this_train_loss = this_train_loss / eval_multiple
                this_valid_loss = np.mean([validate_model(i) for
                                    i in range(valid_batch_multiple)])
                train_eval_scores[eval_index] = this_train_loss
                valid_eval_scores[eval_index] = this_valid_loss
                
                # Save report every five evaluations
                if ((eval_index+1) % 5 == 0):
                    np.savetxt(
                        "output_cv%s_v%s/training_scores.txt" %
                        (cross_val, dataver),
                         train_eval_scores, fmt="%.5f"
                         )
                    np.savetxt(
                        "output_cv%s_v%s/validation_scores.txt" %
                        (cross_val, dataver),
                         valid_eval_scores, fmt="%.5f"
                         )
                    np.savetxt(
                        "output_cv%s_v%s/last_learn_rate.txt" %
                        (cross_val, dataver),
                        [np.array(this_rate)], fmt="%.5f"
                        )
                
                # Save model if best validation score
                if (this_valid_loss < best_valid_loss):  
                    best_valid_loss = this_valid_loss
                    best_iter = cum_iterations-1
                    
                    if save_model:
                        np.savez("output_cv%s_v%s/model.npz" % 
                                 (cross_val, dataver),
                                 LL.get_all_param_values(network_0))
                    
                # Reset evaluation reports
                eval_index += 1
                this_train_loss = 0.0
                this_valid_loss = 0.0
                
        aug_index += 1
        
        end_time = time.time()
        print("Computing time for batch %d: %f" % (batch, end_time-start_time))
        
    print("Best validation loss %f after %d epochs" %
          (best_valid_loss, (best_iter*train_minibatch_size//epoch_size)))
    
    del train_set_x, train_set_y, valid_set_x, valid_set_y
    gc.collect()
    
    return None
def run_mlp(train, val, num_epochs):
    # Partition Data
    train_rows, train_cols = train.shape
    train_rows, train_cols = train_rows, (train_cols - 1) 
    val_rows, val_cols = val.shape    
    val_rows, val_cols = val_rows, (val_cols-1)    
    
    X_train,y_train = train[0:train_rows ,0:train_cols],train[0:train_rows,train_cols:]
    X_val,y_val = val[0:val_rows,0:val_cols],val[0:val_rows,val_cols:]
    # Theano variables
    input_var = T.matrix('inputs')
    target_var = T.matrix('targets')
    
    network = build_mlp(input_var, train_cols, 1)
    
#    """loading weight values from the previous model"""
#    with np.load('model_first_run.npz') as f:
#        param_values = [f['arr_%d'%i] for i in range(len(f.files))]
#        param_values[0] = param_values[0][4:43]
#    lasagne.layers.set_all_param_values(network,param_values)
    
    prediction = lasagne.layers.get_output(network,input_var, deterministic = True)
    loss = lasagne.objectives.binary_crossentropy(prediction, target_var)
    loss = aggregate(loss, mode='mean')
    
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.sgd(loss, params, learning_rate=0.5)
    
    # Train Function
    train_fn = theano.function([input_var, target_var],loss, updates=updates)
    
    # Validation function
    val_fn = theano.function([input_var, target_var],loss) 
    
    # test function
    f_test = theano.function([input_var], prediction)    
    
    val_err_list,train_err_list = list(),list()
    print("Starting training...")

    val_err_list,train_err_list = list(),list()
    for epoch in range(num_epochs):
        start_time = timeit.default_timer()        
        train_err = 0
        train_batches = 0
        #start_time = time.time()
        for batch in iterate_minibatches(X_train,y_train,100,shuffle=True):
            inputs,targets = batch
            #print (inputs.shape, targets.shape)
            batch_error = train_fn(inputs,targets)
            #print (list(f_test(inputs)))
            train_err += batch_error
            train_batches += 1
            #print (batch_error, train_batches)
            #if (train_batches%10==0):
                #print(train_batches)
                #print (batch_error)
        train_err_list.append(train_err)    
        
        
        val_err = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, 50, shuffle=False):
            inputs, targets = batch
            err = val_fn(inputs, targets)
            val_err += err
            val_batches += 1
            #print (err,val_batches)
        val_err_list.append(val_err)
        # Save results of the epoch
        if (epoch%1 == 0):
            file_name = 'model_epoch_' + str(epoch) + '.npz'
            np.savez(file_name,*lasagne.layers.get_all_param_values(network))
        print ('Epoch: ',epoch, '  train error: ',train_err,' val erro:',val_err)
        train_err_line = ('Epoch:,'+str(epoch)+", train_error:, "+str(train_err)+", val_error:, "+str(val_err)+"\n")
        file_result = 'C:\\Users\\Administrator\\Desktop\\Input_FUT\\NN_model\\' + str(train_dt) + '\\python runs'
        filepath = os.path.join(file_result, 'train_val_err.csv')
        fout = open(filepath,"a")
#        train_err_file.writelines(train_err_line)
        fout.write(train_err_line)
        fout.close()
        end_time = timeit.default_timer()
        print (end_time - start_time)
    return val_err_list,train_err_list   
def loss(prediction, target):
    return aggregate(categorical_crossentropy_logdomain(prediction,target))
Beispiel #44
0
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']),
                                input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        W = None
        if model['hidden_nonlinearity'] == 'ReLu':
            W = lasagne.init.GlorotUniform('relu')
        else:
            W = lasagne.init.GlorotUniform(1)

        fc = layers.DenseLayer(prev_layer,
                               model['units'],
                               nonlinearity=nonlin,
                               W=W)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] != OUTPUT_LOG:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    if model['output_mode'] == OUTPUT_NO:
        prediction_unboun = layers.get_output(output_layer)
        loss = objectives.squared_error(prediction_unboun, targets_var)
    else:
        loss = objectives.squared_error(predictions, targets_var)

    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)
    #    test_prediction = layers.get_output(output_layer, deterministic=True)  #fix for dropout
    test_loss = objectives.squared_error(predictions, targets_var)
    test_loss = test_loss.mean()

    if model['hidden_nonlinearity'] == 'ReLu':
        model['lr'] *= 0.5
    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    #    pred_fn = theano.function([input_data], prediction_unboun)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {
        'train': train_fn,
        'eval': val_fn,
        'pred': pred_fn,
        'layers': output_layer
    }
Beispiel #45
0
    def build_model(self, train_set, test_set, validation_set=None):
        super(UFCNN, self).build_model(train_set, test_set, validation_set)

        epsilon = 1e-8
        loss_cc = aggregate(categorical_crossentropy(
            T.clip(get_output(self.model, self.sym_x), epsilon, 1),
            self.sym_t),
                            mode='mean')

        y = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                   epsilon, 1)
        loss_eval = aggregate(categorical_crossentropy(y, self.sym_t),
                              mode='mean')
        loss_acc = categorical_accuracy(y, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        grads = T.grad(loss_cc, all_params)
        for idx, param in enumerate(all_params):
            param_name = param.name
            if ('h2.W' in param_name) or ('g2.W' in param_name):
                print(param_name)
                grads[idx] *= self.l2_mask
            if ('h3.W' in param_name) or ('g3.W' in param_name):
                print(param_name)
                grads[idx] *= self.l3_mask

        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        updates = adam(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss_cc],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [self.sym_index, self.sym_batchsize],
            [loss_eval],
            givens={
                self.sym_x: self.sh_test_x[self.batch_slice],
                self.sym_t: self.sh_test_t[self.batch_slice],
            },
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_index, self.sym_batchsize],
                [loss_eval, loss_acc],
                givens={
                    self.sym_x: self.sh_valid_x[self.batch_slice],
                    self.sym_t: self.sh_valid_t[self.batch_slice],
                },
            )

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999
        self.train_args['outputs']['loss_cc'] = '%0.6f'

        self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_eval'] = '%0.6f'

        self.validate_args['inputs']['batchsize'] = 128
        self.validate_args['outputs']['loss_eval'] = '%0.6f'
        self.validate_args['outputs']['loss_acc'] = '%0.6f%%'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
    def build_model(self, train_set, test_set, validation_set=None):
        super(FCAE, self).build_model(train_set, test_set, validation_set)

        y_train = get_output(self.model, self.sym_x)
        loss = aggregate(squared_error(y_train, self.sym_x), mode='mean')
        # loss += + 1e-4 * lasagne.regularization.regularize_network_params(self.model, lasagne.regularization.l2)

        y_test = get_output(self.model, self.sym_x, deterministic=True)
        loss_test = aggregate(squared_error(y_test, self.sym_x), mode='mean')

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(loss, all_params)
        ngrads = lasagne.updates.total_norm_constraint(grads, 5)
        cgrads = [T.clip(g, -5, 5) for g in ngrads]
        updates = rmsprop(cgrads, all_params, self.sym_lr, sym_beta1,
                          sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
            },
        )

        f_test = theano.function(
            [self.sym_index, self.sym_batchsize],
            [loss_test],
            givens={
                self.sym_x: self.sh_test_x[self.batch_slice],
            },
            on_unused_input='ignore',
        )

        f_ae = None
        # f_ae = theano.function(
        #     [self.sym_batchsize], [y_test],
        #     givens={
        #         self.sym_x: self.sh_valid_x,
        #     },
        #     on_unused_input='ignore',
        # )

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 1e-6
        self.train_args['outputs']['loss'] = '%0.6f'

        self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_test'] = '%0.6f'

        # self.validate_args['inputs']['batchsize'] = 128
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['loss_acc'] = '%0.6f'
        return f_train, f_test, f_ae, self.train_args, self.test_args, self.validate_args
Beispiel #47
0
    def msq_err(self, train_output, target_values):

        loss = squared_error(train_output, target_values)
        loss = aggregate(loss, mode='mean')

        return loss
def do_regression(
        num_epochs=60,  # No. of epochs to train
        init_file=None,  # Saved parameters to initialise training
        epoch_size=680780,  # Whole dataset size
        valid_size=34848,
        train_batch_multiple=10637,  # No. of minibatches per batch
        valid_batch_multiple=1089,  # No. of minibatches per batch
        train_minibatch_size=64,
        valid_minibatch_size=32,
        eval_multiple=50,  # No. of minibatches to ave. in report
        save_model=True,
        input_width=19,
        rng_seed=100009,
        cross_val=0,  # Cross-validation subset label
        dataver=1,  # Label for different runs/architectures/etc
        rate_init=1.0,
        rate_decay=0.999983):

    ###################################################
    ################# 0. User inputs ##################
    ###################################################
    for i in range(1, len(sys.argv)):
        if sys.argv[i].startswith('-'):
            option = sys.argv[i][1:]
            if option == 'i': init_file = sys.argv[i + 1]
            elif option[0:2] == 'v=': dataver = int(option[2:])
            elif option[0:3] == 'cv=': cross_val = int(option[3:])
            elif option[0:3] == 'rs=': rng_seed = int(option[3:])
            elif option[0:3] == 'ri=': rate_init = np.float32(option[3:])
            elif option[0:3] == 'rd=': rate_decay = np.float32(option[3:])

    print("Running with dataver %s" % (dataver))
    print("Running with cross_val %s" % (cross_val))

    ###################################################
    ############# 1. Housekeeping values ##############
    ###################################################
    # Batch size is possibly not equal to epoch size due to memory limits
    train_batch_size = train_batch_multiple * train_minibatch_size
    assert epoch_size >= train_batch_size

    # Number of times we expect the training/validation generator to be called
    max_train_gen_calls = (num_epochs * epoch_size) // train_batch_size

    # Number of evaluations (total minibatches / eval_multiple)
    num_eval = max_train_gen_calls * train_batch_multiple / eval_multiple

    ###################################################
    ###### 2. Define model and theano variables #######
    ###################################################
    if rng_seed is not None:
        print("Setting RandomState with seed=%i" % (rng_seed))
        rng = np.random.RandomState(rng_seed)
        set_rng(rng)

    print("Defining variables...")
    index = T.lscalar()  # Minibatch index
    x = T.tensor3('x')  # Inputs
    y = T.fvector('y')  # Target

    print("Defining model...")
    network_0 = build_1Dregression_v1(input_var=x,
                                      input_width=input_width,
                                      nin_units=12,
                                      h_num_units=[64, 128, 256, 128, 64],
                                      h_grad_clip=1.0,
                                      output_width=1)

    if init_file is not None:
        print("Loading initial model parametrs...")
        init_model = np.load(init_file)
        init_params = init_model[init_model.files[0]]
        LL.set_all_param_values([network_0], init_params)

    ###################################################
    ################ 3. Import data ###################
    ###################################################
    # Loading data generation model parameters
    print("Defining shared variables...")
    train_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX),
                                borrow=True)
    train_set_x = theano.shared(np.zeros((1, 1, 1),
                                         dtype=theano.config.floatX),
                                borrow=True)

    valid_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX),
                                borrow=True)
    valid_set_x = theano.shared(np.zeros((1, 1, 1),
                                         dtype=theano.config.floatX),
                                borrow=True)

    # Validation data (pick a single augmented instance, rand0 here)
    print("Creating validation data...")
    chunk_valid_data = np.load(
        "./valid/data_valid_augmented_cv%s_t%s_rand0.npy" %
        (cross_val, input_width)).astype(theano.config.floatX)
    chunk_valid_answers = np.load("./valid/data_valid_expected_cv%s.npy" %
                                  (cross_val)).astype(theano.config.floatX)

    print "chunk_valid_answers.shape", chunk_valid_answers.shape
    print("Assigning validation data...")
    valid_set_y.set_value(chunk_valid_answers[:])
    valid_set_x.set_value(chunk_valid_data.transpose(0, 2, 1))

    # Create output directory
    if not os.path.exists("output_cv%s_v%s" % (cross_val, dataver)):
        os.makedirs("output_cv%s_v%s" % (cross_val, dataver))

    ###################################################
    ########### 4. Create Loss expressions ############
    ###################################################
    print("Defining loss expressions...")
    prediction_0 = LL.get_output(network_0)
    train_loss = aggregate(T.abs_(prediction_0 - y.dimshuffle(0, 'x')))

    valid_prediction_0 = LL.get_output(network_0, deterministic=True)
    valid_loss = aggregate(T.abs_(valid_prediction_0 - y.dimshuffle(0, 'x')))

    ###################################################
    ############ 5. Define update method  #############
    ###################################################
    print("Defining update choices...")
    params = LL.get_all_params(network_0, trainable=True)
    learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX)

    updates = lasagne.updates.adadelta(train_loss,
                                       params,
                                       learning_rate=learn_rate)

    ###################################################
    ######### 6. Define train/valid functions #########
    ###################################################
    print("Defining theano functions...")
    train_model = theano.function(
        [index, learn_rate],
        train_loss,
        updates=updates,
        givens={
            x:
            train_set_x[(index * train_minibatch_size):((index + 1) *
                                                        train_minibatch_size)],
            y:
            train_set_y[(index * train_minibatch_size):((index + 1) *
                                                        train_minibatch_size)]
        })

    validate_model = theano.function(
        [index],
        valid_loss,
        givens={
            x:
            valid_set_x[index * valid_minibatch_size:(index + 1) *
                        valid_minibatch_size],
            y:
            valid_set_y[index * valid_minibatch_size:(index + 1) *
                        valid_minibatch_size]
        })

    ###################################################
    ################ 7. Begin training ################
    ###################################################
    print("Begin training...")
    sys.stdout.flush()

    cum_iterations = 0
    this_train_loss = 0.0
    this_valid_loss = 0.0
    best_valid_loss = np.inf
    best_iter = 0

    train_eval_scores = np.empty(num_eval)
    valid_eval_scores = np.empty(num_eval)
    eval_index = 0
    aug_index = 0

    for batch in xrange(max_train_gen_calls):
        start_time = time.time()
        chunk_train_data = np.load(
            "./train/data_train_augmented_cv%s_t%s_rand%s.npy" %
            (cross_val, input_width, aug_index)).astype(theano.config.floatX)
        chunk_train_answers = np.load("./train/data_train_expected_cv%s.npy" %
                                      (cross_val)).astype(theano.config.floatX)

        train_set_y.set_value(chunk_train_answers[:])
        train_set_x.set_value(chunk_train_data.transpose(0, 2, 1))

        # Iterate over minibatches in each batch
        for mini_index in xrange(train_batch_multiple):
            this_rate = np.float32(rate_init * (rate_decay**cum_iterations))
            this_train_loss += train_model(mini_index, this_rate)
            cum_iterations += 1

            # Report loss
            if (cum_iterations % eval_multiple == 0):
                this_train_loss = this_train_loss / eval_multiple
                this_valid_loss = np.mean(
                    [validate_model(i) for i in xrange(valid_batch_multiple)])
                train_eval_scores[eval_index] = this_train_loss
                valid_eval_scores[eval_index] = this_valid_loss

                # Save report every five evaluations
                if ((eval_index + 1) % 5 == 0):
                    np.savetxt("output_cv%s_v%s/training_scores.txt" %
                               (cross_val, dataver),
                               train_eval_scores,
                               fmt="%.5f")
                    np.savetxt("output_cv%s_v%s/validation_scores.txt" %
                               (cross_val, dataver),
                               valid_eval_scores,
                               fmt="%.5f")
                    np.savetxt("output_cv%s_v%s/last_learn_rate.txt" %
                               (cross_val, dataver), [np.array(this_rate)],
                               fmt="%.5f")

                # Save model if best validation score
                if (this_valid_loss < best_valid_loss):
                    best_valid_loss = this_valid_loss
                    best_iter = cum_iterations - 1

                    if save_model:
                        np.savez(
                            "output_cv%s_v%s/model.npz" % (cross_val, dataver),
                            LL.get_all_param_values(network_0))

                # Reset evaluation reports
                eval_index += 1
                this_train_loss = 0.0
                this_valid_loss = 0.0

        aug_index += 1

        end_time = time.time()
        print("Computing time for batch %d: %f" %
              (batch, end_time - start_time))

    print("Best validation loss %f after %d epochs" %
          (best_valid_loss, (best_iter * train_minibatch_size // epoch_size)))

    del train_set_x, train_set_y, valid_set_x, valid_set_y
    gc.collect()

    return None
Beispiel #49
0
def test_aggregate_weighted_normalized_sum():
    from lasagne.objectives import aggregate
    x = theano.tensor.matrix('x')
    w = theano.tensor.matrix('w')
    assert theano.gof.graph.is_same_graph(aggregate(x, w, 'normalized_sum'),
                                          (x * w).sum() / w.sum())
Beispiel #50
0
    def build_model(self,
                    train_set,
                    test_set,
                    validation_set=None,
                    weights=None):
        super(BRNN, self).build_model(train_set, test_set, validation_set)

        def brier_score(given, predicted, weight_vector, mask):
            return T.mean(
                T.power(given - predicted, 2.0).dot(weight_vector) * mask)

        epsilon = 1e-8
        mask = get_output(self.mask, self.sym_x)
        y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1)
        train_brier = brier_score(y_train, self.sym_t, weights, mask)
        train_cc = aggregate(categorical_crossentropy(y_train, self.sym_t),
                             mode='mean')
        loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean()

        y_test = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                        epsilon, 1)
        test_brier = brier_score(y_test, self.sym_t, weights, mask)
        test_cc = aggregate(categorical_crossentropy(y_test, self.sym_t),
                            mode='mean')
        test_acc = categorical_accuracy(y_test, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(train_brier, all_params)
        grads = [T.clip(g, -1, 1) for g in grads]
        updates = adam(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [train_cc, train_brier],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [],
            [test_cc, test_brier],
            givens={
                self.sym_x: self.sh_test_x,
                self.sym_t: self.sh_test_t,
            },
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_index, self.sym_batchsize],
                [test_cc, test_acc],
                givens={
                    self.sym_x: self.sh_valid_x[self.batch_slice],
                    self.sym_t: self.sh_valid_t[self.batch_slice],
                },
            )

        predict = theano.function([self.sym_x], [y_test])

        self.train_args['inputs']['batchsize'] = 64
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999  # 1e-6
        self.train_args['outputs']['train_cc'] = '%0.4f'
        # self.train_args['outputs']['train_acc'] = '%0.4f'
        self.train_args['outputs']['train_brier'] = '%0.4f'

        # self.test_args['inputs']['batchsize'] = 64
        self.test_args['outputs']['test_cc'] = '%0.4f'
        # self.test_args['outputs']['test_acc'] = '%0.4f'
        self.test_args['outputs']['test_brier'] = '%0.4f'

        # self.validate_args['inputs']['batchsize'] = 64
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['test_acc'] = '%0.6f'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args, predict
Beispiel #51
0
    def __init__(self, hidden_size=100, nclasses=73, num_embeddings=11359, embedding_dim=100, window_size=1,
                 memory_size=40, n_memory_slots=8, go_code=1, depth=2, load_dir=None):

        articles, titles = T.imatrices('articles', 'titles')
        n_article_slots = int(n_memory_slots / 2)  # TODO derive this from an arg
        n_title_slots = n_memory_slots - n_article_slots
        n_instances = articles.shape[0]

        self.window_size = window_size

        randoms = {
            # attr: shape
            # 'emb': (num_embeddings + 1, embedding_dim),
            'M_a': (memory_size, n_article_slots),
            'M_t': (memory_size, n_title_slots),
            'w_a': (n_article_slots,),
            'w_t': (n_title_slots,),
            'Wg_a': (window_size * embedding_dim, n_article_slots),
            'Wg_t': (window_size * embedding_dim, n_title_slots),
            'Wk': (hidden_size, memory_size),
            'Wb': (hidden_size, 1),
            'Wv': (hidden_size, memory_size),
            'We_a': (hidden_size, n_article_slots),
            'We_t': (hidden_size, n_title_slots),
            'Wx': (window_size * embedding_dim, hidden_size),
            'Wh': (memory_size, hidden_size),
            'W': (hidden_size, nclasses),
            'h0': hidden_size
        }

        zeros = {
            # attr: shape
            'bg_a': n_article_slots,
            'bg_t': n_title_slots,
            'bk': memory_size,
            'bb': 1,
            'bv': memory_size,
            'be_a': n_article_slots,
            'be_t': n_title_slots,
            'bh': hidden_size,
            'b': nclasses,
        }

        for l in range(depth):
            randoms['gru' + str(l)] = (1, embedding_dim)

        def random_shared(name):
            shape = randoms[name]
            return theano.shared(
                0.2 * np.random.normal(size=shape).astype(theano.config.floatX),
                name=name)

        def zeros_shared(name):
            shape = zeros[name]
            return theano.shared(np.zeros(shape, dtype=theano.config.floatX), name=name)

        for key in randoms:
            # create an attribute with associated shape and random values
            setattr(self, key, random_shared(key))

        for key in zeros:
            # create an attribute with associated shape and values equal to 0
            setattr(self, key, zeros_shared(key))

        self.names = randoms.keys() + zeros.keys()
        # self.names.remove('emb')  # no need to save or update embeddings
        scan_vars = 'h0 w_a M_a w_t M_t'.split()

        def repeat_for_each_instance(param):
            """ repeat param along new axis once for each instance """
            return T.repeat(T.shape_padleft(param), repeats=n_instances, axis=0)

        for key in scan_vars:
            setattr(self, key, repeat_for_each_instance(self.__getattribute__(key)))
            self.names.remove(key)

        if load_dir is not None:
            with open(os.path.join(load_dir, 'params.pkl')) as handle:
                params = pickle.load(handle)
                self.__dict__.update(params)

        def recurrence(i,
                       h_tm1,
                       w_a,
                       M_a,
                       *args,
                       **kwargs):
            """
            notes
            Headers from paper in all caps
            mem = n_article slots if is_article else n_title_slots

            :param i: center index of sliding window
            :param h_tm1: h_{t-1} (hidden state)
            :param w_a: attention weights for article memory
            :param M_a: article memory
            :param args: gru_weights, maybe w_t, maybe M_t
                   gru_weights: weights with which to initialize GRULayer on each time step
                   w_t: attention weights for titles memory
                   M_t: titles memory
            :param kwargs: is_training, is_article
                   is_training:
                   is_article: we use different parts of memory when working with a article
            :return: [y = model outputs,
                      i + 1 = increment index,
                      h w, M (see above)]
            """
            is_training = kwargs['is_training']
            is_article = kwargs['is_article']
            gru_weights = args[:depth]
            if len(args) > depth:
                w_t = args[depth]
                M_t = args[depth + 1]

            i_type = T.iscalar if is_article or is_training else T.ivector
            assert i.type == i_type

            if not is_article:
                assert w_t is not None and M_t is not None

            word_idxs = i
            if is_article or is_training:
                # get representation of word window
                document = articles if is_article else titles  # [instances, bucket_width]
                word_idxs = document[:, i:i+1]  # [instances, 1]
            # x_i = self.emb[word_idxs].flatten(ndim=2)  # [instances, embedding_dim]

            input = InputLayer(shape=(None, 1),
                               input_var=word_idxs)
            embed = EmbeddingLayer(input, num_embeddings, embedding_dim)
            gru = GRULayer(incoming=embed, num_units=embedding_dim, hid_init=self.gru0)
            for weight in gru_weights:
                gru = GRULayer(incoming=gru, num_units=embedding_dim,
                               hid_init=weight)
            x_i = get_output(gru).flatten(ndim=2)
            x_i = Print('x_i')(x_i)  # [instances, embedding_dim]

            gru_weights = []

            if is_article:
                M_read = M_a  # [instances, memory_size, n_article_slots]
                w_read = w_a  # [instances, n_article_slots]
            else:
                M_read = T.concatenate([M_a, M_t], axis=2)  # [instances, memory_size, n_title_slots]
                w_read = T.concatenate([w_a, w_t], axis=1)  # [instances, n_title_slots]

            # eqn 15
            c = T.batched_dot(M_read, w_read)  # [instances, memory_size]

            # EXTERNAL MEMORY READ
            def get_attention(Wg, bg, M, w):
                g = T.nnet.sigmoid(T.dot(x_i, Wg) + bg)  # [instances, mem]

                # eqn 11
                k = T.dot(h_tm1, self.Wk) + self.bk  # [instances, memory_size]

                # eqn 13
                beta = T.dot(h_tm1, self.Wb) + self.bb
                beta = T.nnet.softplus(beta)
                beta = T.addbroadcast(beta, 1)  # [instances, 1]

                # eqn 12
                w_hat = T.nnet.softmax(beta * cosine_dist(M, k))

                # eqn 14
                return (1 - g) * w + g * w_hat  # [instances, mem]

            w_a = get_attention(self.Wg_a, self.bg_a, M_a, w_a)  # [instances, n_article_slots]
            if not is_article:
                w_t = get_attention(self.Wg_t, self.bg_t, M_t, w_t)  # [instances, n_title_slots]

            # MODEL INPUT AND OUTPUT
            # eqn 9
            h = T.dot(c, self.Wh) + T.dot(x_i, self.Wx) + self.bh  # [instances, hidden_size]

            # eqn 10
            y = T.nnet.softmax(T.dot(h, self.W) + self.b)  # [instances, nclasses]

            # EXTERNAL MEMORY UPDATE
            def update_memory(We, be, w_update, M_update):
                # eqn 17
                e = T.nnet.sigmoid(T.dot(h_tm1, We) + be)  # [instances, mem]
                f = 1. - w_update * e  # [instances, mem]

                # eqn 16
                v = T.tanh(T.dot(h, self.Wv) + self.bv)  # [instances, memory_size]

                # need to add broadcast layers for memory update
                f = f.dimshuffle(0, 'x', 1)  # [instances, 1, mem]
                u = w_update.dimshuffle(0, 'x', 1)  # [instances, 1, mem]
                v = v.dimshuffle(0, 1, 'x')  # [instances, memory_size, 1]

                # eqn 19
                return M_update * f + T.batched_dot(v, u) * (1 - f)  # [instances, memory_size, mem]

            M_a = update_memory(self.We_a, self.be_a, w_a, M_a)
            attention_and_memory = [w_a, M_a]
            if not is_article:
                M_t = update_memory(self.We_t, self.be_t, w_t, M_t)
                attention_and_memory += [w_t, M_t]

            y_max = y.argmax(axis=1).astype(int32)
            next_idxs = i + 1 if is_training or is_article else y_max
            return [y, y_max, next_idxs, h] + attention_and_memory

        read_article = partial(recurrence, is_training=True, is_article=True)
        # for read_article, it actually doesn't matter whether is_training is true

        i0 = T.constant(0, dtype=int32, name='first_value_of_i')
        gru_weights = [eval('self.gru' + str(l)) for l in range(depth)]
        outputs_info = [None, None, i0, self.h0, self.w_a, self.M_a] + gru_weights

        [_, _, _, h, w, M], _ = theano.scan(fn=read_article,
                                            outputs_info=outputs_info,
                                            n_steps=articles.shape[1],
                                            name='read_scan')

        produce_title = partial(recurrence, is_training=True, is_article=False)
        outputs_info[3:6] = [param[-1, :, :] for param in (h, w, M)]
        outputs_info.extend([self.w_t, self.M_t])
        bucket_width = titles.shape[1] - 1  # subtract 1 because <go> is omitted in y_true
        [y, y_max, _, _, _, _, _, _], _ = theano.scan(fn=produce_title,
                                                      outputs_info=outputs_info,
                                                      n_steps=bucket_width,
                                                      name='train_scan')

        # loss and updates
        y_clip = T.clip(y, .01, .99)
        y_flatten = y_clip.dimshuffle(2, 1, 0).flatten(ndim=2).T
        y_true = titles[:, 1:].ravel()  # [:, 1:] in order to omit <go>
        counts = T.extra_ops.bincount(y_true, assert_nonneg=True)
        weights = 1.0 / (counts[y_true] + 1) * T.neq(y_true, 0)
        losses = T.nnet.categorical_crossentropy(y_flatten, y_true)
        loss = objectives.aggregate(losses, weights, mode='sum')
        updates = adadelta(loss, self.params())

        self.learn = theano.function(inputs=[articles, titles],
                                     outputs=[y_max.T, loss],
                                     updates=updates,
                                     allow_input_downcast=True,
                                     name='learn')

        produce_title_test = partial(recurrence, is_training=False, is_article=False)

        self.test = theano.function(inputs=[articles, titles],
                                    outputs=[y_max.T],
                                    on_unused_input='ignore')

        outputs_info[2] = T.zeros([n_instances], dtype=int32) + go_code
        [_, y_max, _, _, _, _, _, _], _ = theano.scan(fn=produce_title_test,
                                                      outputs_info=outputs_info,
                                                      n_steps=bucket_width,
                                                      name='test_scan')

        self.predict = theano.function(inputs=[articles, titles],
                                       outputs=y_max.T,
                                       name='infer')
    def build_model(self, train_set, test_set, validation_set=None):
        super(CNN, self).build_model(train_set, test_set, validation_set)

        epsilon = 1e-8
        y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1)
        loss_cc = aggregate(categorical_crossentropy(y_train, self.sym_t),
                            mode='mean')
        loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean()

        y = T.clip(get_output(self.model, self.sym_x, deterministic=True),
                   epsilon, 1)
        loss_eval = aggregate(categorical_crossentropy(y, self.sym_t),
                              mode='mean')
        loss_acc = categorical_accuracy(y, self.sym_t).mean()

        all_params = get_all_params(self.model, trainable=True)
        sym_beta1 = T.scalar('beta1')
        sym_beta2 = T.scalar('beta2')
        grads = T.grad(loss_cc, all_params)
        grads = [T.clip(g, -5, 5) for g in grads]
        updates = rmsprop(grads, all_params, self.sym_lr, sym_beta1, sym_beta2)

        inputs = [
            self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1,
            sym_beta2
        ]
        f_train = theano.function(
            inputs,
            [loss_cc, loss_train_acc],
            updates=updates,
            givens={
                self.sym_x: self.sh_train_x[self.batch_slice],
                self.sym_t: self.sh_train_t[self.batch_slice],
            },
        )

        f_test = theano.function(
            [self.sym_index, self.sym_batchsize],
            [loss_eval, loss_acc],
            givens={
                self.sym_x: self.sh_test_x[self.batch_slice],
                self.sym_t: self.sh_test_t[self.batch_slice],
            },
        )

        f_validate = None
        if validation_set is not None:
            f_validate = theano.function(
                [self.sym_index, self.sym_batchsize],
                [loss_eval, loss_acc],
                givens={
                    self.sym_x: self.sh_valid_x[self.batch_slice],
                    self.sym_t: self.sh_valid_t[self.batch_slice],
                },
            )

        self.train_args['inputs']['batchsize'] = 128
        self.train_args['inputs']['learningrate'] = 1e-3
        self.train_args['inputs']['beta1'] = 0.9
        self.train_args['inputs']['beta2'] = 0.999
        self.train_args['outputs']['loss_cc'] = '%0.6f'
        self.train_args['outputs']['loss_train_acc'] = '%0.6f'

        self.test_args['inputs']['batchsize'] = 128
        self.test_args['outputs']['loss_eval'] = '%0.6f'
        self.test_args['outputs']['loss_acc'] = '%0.6f'

        self.validate_args['inputs']['batchsize'] = 128
        # self.validate_args['outputs']['loss_eval'] = '%0.6f'
        # self.validate_args['outputs']['loss_acc'] = '%0.6f'
        return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
def build_update_functions(train_set_x, train_set_y,
                           valid_set_x, valid_set_y,
                           network,
                           y, X,
                           train_MASK, val_MASK,
                           batch_size=32,
                           l2_reg=.0001):

    # build update functions
    # extract tensor representing the network predictions
    prediction = get_output(network)

    ###################New#########################
    # Aggregate the element-wise error into a scalar value using a mask
    # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
    # is not used to calculate the aggregated error and update of the network.
    # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.

    # build tensor variable for mask
    trainMASK = T.matrix('trainMASK')
    # collect squared error
    loss_RMSE = squared_error(prediction, y)
    # Drop nan values and average over the remaining values
    loss_RMSE = aggregate(loss_RMSE, weights=trainMASK, mode='normalized_sum')
    # compute the square root
    loss_RMSE = loss_RMSE.sqrt()
    ###############################################

    # add l2 regularization
    # l2_penalty = regularize_layer_params(network, l2)
    # regc = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]
    # layers = get_all_layers(network)
    # reg_weights = {key: value for (key, value) in zip(layers, regc)}
    # l2_penalty = regularize_layer_params_weighted(reg_weights, l2)

    loss = loss_RMSE#(1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty

    # get network params
    params = get_all_params(network)
    # subset_params = params
    #subset network params to extract the ones that you want to train
    # print 'length of params',len(params), '\n'
    subset_params = [params[0], params[1], params[10], params[11], params[12], params[13]]


    #
    print('RMSPROP \n')
    updates = rmsprop(loss, subset_params, learning_rate=1e-4)
    # create validation/test loss expression
    # the loss represents the loss for all the labels
    test_prediction = get_output(network, deterministic=True)

    ###################New#########################
    # Aggregate the element-wise error into a scalar value using a mask
    # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
    # is not used to calculate the aggregated error and update of the network.
    # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.


    # build tensor variable for mask
    valMASK = T.matrix('valMASK')
    # collect squared error
    test_loss = squared_error(test_prediction, y)
    # Drop nan values and average over the remaining values
    test_loss = aggregate(test_loss, weights=valMASK, mode='normalized_sum')
    # compute the square root
    test_loss = test_loss.sqrt()
    ################################################
    # index for mini-batch slicing
    index = T.lscalar()

    # training function
    train_set_x_size = train_set_x.get_value().shape[0]
    val_set_x_size = valid_set_x.get_value().shape[0]

    train_fn = theano.function(inputs=[index],
                               outputs=[loss, loss_RMSE],
                               updates=updates,
                               givens={X: train_set_x[
                                          index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
                                       y: train_set_y[
                                          index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
                                       trainMASK: train_MASK[index * batch_size: T.minimum((index + 1) * batch_size,
                                                                                           train_set_x_size)]})
    # validation function
    val_fn = theano.function(inputs=[index],
                             outputs=[test_loss, prediction],
                             givens={X: valid_set_x[
                                        index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
                                     y: valid_set_y[
                                        index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
                                     valMASK: val_MASK[
                                              index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)]})
    return train_fn, val_fn
Beispiel #54
0
    def __init__(self,
                 hidden_size=100,
                 nclasses=73,
                 num_embeddings=11359,
                 embedding_dim=100,
                 window_size=1,
                 memory_size=40,
                 n_memory_slots=8,
                 go_code=1,
                 depth=2,
                 load_dir=None):

        articles, titles = T.imatrices('articles', 'titles')
        n_article_slots = int(n_memory_slots /
                              2)  # TODO derive this from an arg
        n_title_slots = n_memory_slots - n_article_slots
        n_instances = articles.shape[0]

        self.window_size = window_size

        randoms = {
            # attr: shape
            # 'emb': (num_embeddings + 1, embedding_dim),
            'M_a': (memory_size, n_article_slots),
            'M_t': (memory_size, n_title_slots),
            'w_a': (n_article_slots, ),
            'w_t': (n_title_slots, ),
            'Wg_a': (window_size * embedding_dim, n_article_slots),
            'Wg_t': (window_size * embedding_dim, n_title_slots),
            'Wk': (hidden_size, memory_size),
            'Wb': (hidden_size, 1),
            'Wv': (hidden_size, memory_size),
            'We_a': (hidden_size, n_article_slots),
            'We_t': (hidden_size, n_title_slots),
            'Wx': (window_size * embedding_dim, hidden_size),
            'Wh': (memory_size, hidden_size),
            'W': (hidden_size, nclasses),
            'h0': hidden_size
        }

        zeros = {
            # attr: shape
            'bg_a': n_article_slots,
            'bg_t': n_title_slots,
            'bk': memory_size,
            'bb': 1,
            'bv': memory_size,
            'be_a': n_article_slots,
            'be_t': n_title_slots,
            'bh': hidden_size,
            'b': nclasses,
        }

        for l in range(depth):
            randoms['gru' + str(l)] = (1, embedding_dim)

        def random_shared(name):
            shape = randoms[name]
            return theano.shared(
                0.2 *
                np.random.normal(size=shape).astype(theano.config.floatX),
                name=name)

        def zeros_shared(name):
            shape = zeros[name]
            return theano.shared(np.zeros(shape, dtype=theano.config.floatX),
                                 name=name)

        for key in randoms:
            # create an attribute with associated shape and random values
            setattr(self, key, random_shared(key))

        for key in zeros:
            # create an attribute with associated shape and values equal to 0
            setattr(self, key, zeros_shared(key))

        self.names = randoms.keys() + zeros.keys()
        # self.names.remove('emb')  # no need to save or update embeddings
        scan_vars = 'h0 w_a M_a w_t M_t'.split()

        def repeat_for_each_instance(param):
            """ repeat param along new axis once for each instance """
            return T.repeat(T.shape_padleft(param),
                            repeats=n_instances,
                            axis=0)

        for key in scan_vars:
            setattr(self, key,
                    repeat_for_each_instance(self.__getattribute__(key)))
            self.names.remove(key)

        if load_dir is not None:
            with open(os.path.join(load_dir, 'params.pkl')) as handle:
                params = pickle.load(handle)
                self.__dict__.update(params)

        def recurrence(i, h_tm1, w_a, M_a, *args, **kwargs):
            """
            notes
            Headers from paper in all caps
            mem = n_article slots if is_article else n_title_slots

            :param i: center index of sliding window
            :param h_tm1: h_{t-1} (hidden state)
            :param w_a: attention weights for article memory
            :param M_a: article memory
            :param args: gru_weights, maybe w_t, maybe M_t
                   gru_weights: weights with which to initialize GRULayer on each time step
                   w_t: attention weights for titles memory
                   M_t: titles memory
            :param kwargs: is_training, is_article
                   is_training:
                   is_article: we use different parts of memory when working with a article
            :return: [y = model outputs,
                      i + 1 = increment index,
                      h w, M (see above)]
            """
            is_training = kwargs['is_training']
            is_article = kwargs['is_article']
            gru_weights = args[:depth]
            if len(args) > depth:
                w_t = args[depth]
                M_t = args[depth + 1]

            i_type = T.iscalar if is_article or is_training else T.ivector
            assert i.type == i_type

            if not is_article:
                assert w_t is not None and M_t is not None

            word_idxs = i
            if is_article or is_training:
                # get representation of word window
                document = articles if is_article else titles  # [instances, bucket_width]
                word_idxs = document[:, i:i + 1]  # [instances, 1]
            # x_i = self.emb[word_idxs].flatten(ndim=2)  # [instances, embedding_dim]

            input = InputLayer(shape=(None, 1), input_var=word_idxs)
            embed = EmbeddingLayer(input, num_embeddings, embedding_dim)
            gru = GRULayer(incoming=embed,
                           num_units=embedding_dim,
                           hid_init=self.gru0)
            for weight in gru_weights:
                gru = GRULayer(incoming=gru,
                               num_units=embedding_dim,
                               hid_init=weight)
            x_i = get_output(gru).flatten(ndim=2)
            x_i = Print('x_i')(x_i)  # [instances, embedding_dim]

            gru_weights = []

            if is_article:
                M_read = M_a  # [instances, memory_size, n_article_slots]
                w_read = w_a  # [instances, n_article_slots]
            else:
                M_read = T.concatenate(
                    [M_a, M_t],
                    axis=2)  # [instances, memory_size, n_title_slots]
                w_read = T.concatenate([w_a, w_t],
                                       axis=1)  # [instances, n_title_slots]

            # eqn 15
            c = T.batched_dot(M_read, w_read)  # [instances, memory_size]

            # EXTERNAL MEMORY READ
            def get_attention(Wg, bg, M, w):
                g = T.nnet.sigmoid(T.dot(x_i, Wg) + bg)  # [instances, mem]

                # eqn 11
                k = T.dot(h_tm1, self.Wk) + self.bk  # [instances, memory_size]

                # eqn 13
                beta = T.dot(h_tm1, self.Wb) + self.bb
                beta = T.nnet.softplus(beta)
                beta = T.addbroadcast(beta, 1)  # [instances, 1]

                # eqn 12
                w_hat = T.nnet.softmax(beta * cosine_dist(M, k))

                # eqn 14
                return (1 - g) * w + g * w_hat  # [instances, mem]

            w_a = get_attention(self.Wg_a, self.bg_a, M_a,
                                w_a)  # [instances, n_article_slots]
            if not is_article:
                w_t = get_attention(self.Wg_t, self.bg_t, M_t,
                                    w_t)  # [instances, n_title_slots]

            # MODEL INPUT AND OUTPUT
            # eqn 9
            h = T.dot(c, self.Wh) + T.dot(
                x_i, self.Wx) + self.bh  # [instances, hidden_size]

            # eqn 10
            y = T.nnet.softmax(T.dot(h, self.W) +
                               self.b)  # [instances, nclasses]

            # EXTERNAL MEMORY UPDATE
            def update_memory(We, be, w_update, M_update):
                # eqn 17
                e = T.nnet.sigmoid(T.dot(h_tm1, We) + be)  # [instances, mem]
                f = 1. - w_update * e  # [instances, mem]

                # eqn 16
                v = T.tanh(T.dot(h, self.Wv) +
                           self.bv)  # [instances, memory_size]

                # need to add broadcast layers for memory update
                f = f.dimshuffle(0, 'x', 1)  # [instances, 1, mem]
                u = w_update.dimshuffle(0, 'x', 1)  # [instances, 1, mem]
                v = v.dimshuffle(0, 1, 'x')  # [instances, memory_size, 1]

                # eqn 19
                return M_update * f + T.batched_dot(v, u) * (
                    1 - f)  # [instances, memory_size, mem]

            M_a = update_memory(self.We_a, self.be_a, w_a, M_a)
            attention_and_memory = [w_a, M_a]
            if not is_article:
                M_t = update_memory(self.We_t, self.be_t, w_t, M_t)
                attention_and_memory += [w_t, M_t]

            y_max = y.argmax(axis=1).astype(int32)
            next_idxs = i + 1 if is_training or is_article else y_max
            return [y, y_max, next_idxs, h] + attention_and_memory

        read_article = partial(recurrence, is_training=True, is_article=True)
        # for read_article, it actually doesn't matter whether is_training is true

        i0 = T.constant(0, dtype=int32, name='first_value_of_i')
        gru_weights = [eval('self.gru' + str(l)) for l in range(depth)]
        outputs_info = [None, None, i0, self.h0, self.w_a, self.M_a
                        ] + gru_weights

        [_, _, _, h, w, M], _ = theano.scan(fn=read_article,
                                            outputs_info=outputs_info,
                                            n_steps=articles.shape[1],
                                            name='read_scan')

        produce_title = partial(recurrence, is_training=True, is_article=False)
        outputs_info[3:6] = [param[-1, :, :] for param in (h, w, M)]
        outputs_info.extend([self.w_t, self.M_t])
        bucket_width = titles.shape[
            1] - 1  # subtract 1 because <go> is omitted in y_true
        [y, y_max, _, _, _, _, _,
         _], _ = theano.scan(fn=produce_title,
                             outputs_info=outputs_info,
                             n_steps=bucket_width,
                             name='train_scan')

        # loss and updates
        y_clip = T.clip(y, .01, .99)
        y_flatten = y_clip.dimshuffle(2, 1, 0).flatten(ndim=2).T
        y_true = titles[:, 1:].ravel()  # [:, 1:] in order to omit <go>
        counts = T.extra_ops.bincount(y_true, assert_nonneg=True)
        weights = 1.0 / (counts[y_true] + 1) * T.neq(y_true, 0)
        losses = T.nnet.categorical_crossentropy(y_flatten, y_true)
        loss = objectives.aggregate(losses, weights, mode='sum')
        updates = adadelta(loss, self.params())

        self.learn = theano.function(inputs=[articles, titles],
                                     outputs=[y_max.T, loss],
                                     updates=updates,
                                     allow_input_downcast=True,
                                     name='learn')

        produce_title_test = partial(recurrence,
                                     is_training=False,
                                     is_article=False)

        self.test = theano.function(inputs=[articles, titles],
                                    outputs=[y_max.T],
                                    on_unused_input='ignore')

        outputs_info[2] = T.zeros([n_instances], dtype=int32) + go_code
        [_, y_max, _, _, _, _, _,
         _], _ = theano.scan(fn=produce_title_test,
                             outputs_info=outputs_info,
                             n_steps=bucket_width,
                             name='test_scan')

        self.predict = theano.function(inputs=[articles, titles],
                                       outputs=y_max.T,
                                       name='infer')
Beispiel #55
0
def loss(x, t):
    return aggregate(binary_crossentropy(x, t))
"""loading weight values from the previous model"""
"""load layer parameters. To be used only when learning walk forward"""
# with np.load('model.npz') as f:
#     param_values = [f['arr_%d'%i] for i in range(len(f.files))]
# lasagne.layers.set_all_param_values(network,param_values)

prediction = lasagne.layers.get_output(network,input_var, deterministic = True)
loss = lasagne.objectives.binary_crossentropy(prediction, target_var)

"""loss function aggregation only to be used when using cost sensitive training"""
# class_weights = np.empty((50,2),dtype=float)
# class_weights_global = class_weights
# loss = aggregate(loss, weights=theano.shared(class_weights_global),mode='normalized_sum')

"""loss function aggregation to be used without cost sensitive training"""
loss = aggregate(loss,mode='mean')


params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.sgd(loss, params, learning_rate=0.2)
test_prediction = lasagne.layers.get_output(network,input_var, deterministic=True)
test_loss = lasagne.objectives.binary_crossentropy(test_prediction,target_var)
test_loss = test_loss.mean()
# test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),dtype=theano.config.floatX)
# test_acc = test_acc.mean()
train_fn = theano.function([input_var, target_var],loss, updates=updates)
# val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
val_fn = theano.function([input_var, target_var],test_loss)

"""Training"""
print("Starting training...")
def build_update_functions(train_set_x, train_set_y,
                           valid_set_x, valid_set_y,
                           network,
                           y, X,
                           train_MASK, val_MASK,
                           batch_size=32,
                           l2_reg=.0001,
                           learning_rate=.005,
                           momentum=.9):
    # build update functions
    # extract tensor representing the network predictions
    prediction = get_output(network)
    ################################################
    ##################old###########################
    # # collect squared error
    # loss_RMSE = squared_error(prediction, y)
    # # compute the root mean squared error
    # loss_RMSE = loss_RMSE.mean().sqrt()
    ###################New#########################
    # Aggregate the element-wise error into a scalar value using a mask
    # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
    # is not used to calculate the aggregated error and update of the network.
    # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.

    # build tensor variable for mask
    trainMASK = T.matrix('trainMASK')
    # collect squared error
    loss_RMSE = squared_error(prediction, y)
    # Drop nan values and average over the remaining values
    loss_RMSE = aggregate(loss_RMSE, weights=trainMASK, mode='normalized_sum')
    # compute the square root
    loss_RMSE = loss_RMSE.sqrt()
    ###############################################

    # add l2 regularization
    l2_penalty = regularize_network_params(network, l2)
    loss = (1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty
    # get network params
    params = get_all_params(network, trainable = True)

    #     # create update criterion
    # print('nestrov')
    # updates = nesterov_momentum( loss, params, learning_rate=.01, momentum=.9)

    # print('AdaGrad')
    # updates = adagrad(loss, params,learning_rate= 1e-2)
    #
    print('RMSPROP \n')
    updates = rmsprop(loss, params, learning_rate=learning_rate)
    # create validation/test loss expression
    # the loss represents the loss for all the labels
    test_prediction = get_output(network, deterministic=True)
    ################################################
    ##################old###########################
    #     # collect squared error
    #     test_loss = squared_error(test_prediction,y)
    #     # compute the root mean squared error
    #     test_loss = test_loss.mean().sqrt()
    # #     test_loss_withl2 = (1-l2_reg) * test_loss + l2_reg * l2_penalty
    ################################################
    ###################New#########################
    # Aggregate the element-wise error into a scalar value using a mask
    # note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
    # is not used to calculate the aggregated error and update of the network.
    # MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.


    # build tensor variable for mask
    valMASK = T.matrix('valMASK')
    # collect squared error
    test_loss = squared_error(test_prediction, y)
    # Drop nan values and average over the remaining values
    test_loss = aggregate(test_loss, weights=valMASK, mode='normalized_sum')
    # compute the square root
    test_loss = test_loss.sqrt()
    ################################################
    # index for mini-batch slicing
    index = T.lscalar()

    # training function
    train_set_x_size = train_set_x.get_value().shape[0]
    val_set_x_size = valid_set_x.get_value().shape[0]

    train_fn = theano.function(inputs=[index],
                               outputs=[loss, loss_RMSE],
                               updates=updates,
                               givens={X: train_set_x[
                                          index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
                                       y: train_set_y[
                                          index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
                                       trainMASK: train_MASK[index * batch_size: T.minimum((index + 1) * batch_size,
                                                                                           train_set_x_size)]})
    # validation function
    val_fn = theano.function(inputs=[index],
                             outputs=[test_loss, prediction],
                             givens={X: valid_set_x[
                                        index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
                                     y: valid_set_y[
                                        index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
                                     valMASK: val_MASK[
                                              index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)]})
    return train_fn, val_fn
Beispiel #58
0
 def loss(x, t):
     return LO.aggregate(
         LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
 def loss(x, t):
     return LO.aggregate(LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t))
    def build_model(self):
        # Define the model prior to building it
        if not hasattr(self, 'network'):
            self.model()
            self._generate_layer_list()

        # Print the model architecture
        self.log_msg("Network Architecture")
        self.log_msg(self.model_str())

        # Training loss
        train_prediction = get_output(self.network)
        train_loss = aggregate(self.objective(train_prediction,
                                              self.output_var),
                               mode='mean')
        self.log_msg("Objective: {}".format(self.objective.__name__))

        # Validation loss
        validation_prediction = get_output(self.network, deterministic=True)
        validation_loss = aggregate(self.objective(validation_prediction,
                                                   self.output_var),
                                    mode='mean')

        # Update the parameters
        params = get_all_params(self.network, trainable=True)
        popts = {
            'loss_or_grads': train_loss,
            'params': params,
            'learning_rate': self.learning_rate_tensor,
            'momentum': self.momentum_tensor,
        }
        # Inspect to see if momentum is a valid argument for the update
        update_args = inspect.getargspec(self.updates)[0]

        # Remove momentum if not applicable
        if not 'momentum' in update_args:
            del popts['momentum']

        updates = self.updates(**popts)

        # Print the learning rate type
        self.log_msg('Update: %s' % self.updates.__name__)
        self.log_msg("Learning Rate: %s" % self.learning_rate.__name__)

        if 'momentum' in popts.keys():
            self.log_msg("Momentum: %s" % self.momentum.__name__)

        # Define training loss function
        self.train_loss = theano.function(
            inputs=[self.input_var, self.output_var],
            outputs=train_loss,
            updates=updates,
            allow_input_downcast=True,
        )

        # Define the accuracy function for categorisation problems
        cat_accuracy = T.mean(
            T.eq(
                T.argmax(validation_prediction, axis=1),
                T.argmax(self.output_var, axis=1),
            ))

        # Define validation loss function
        if self.objective is squared_error:
            self.valid_loss = theano.function(
                inputs=[self.input_var, self.output_var],
                outputs=validation_loss,
            )
        else:
            self.valid_loss = theano.function(
                inputs=[self.input_var, self.output_var],
                outputs=[validation_loss, cat_accuracy],
            )

        # Define predict
        self.predict = theano.function(inputs=[self.input_var],
                                       outputs=validation_prediction)

        self.train_predict = theano.function(inputs=[self.input_var],
                                             outputs=train_prediction)