Ejemplo n.º 1
0
def sgdWithLrsClip(loss_or_grads,
                   params,
                   learning_rate=.01,
                   mu_lr=.01,
                   si_lr=.001,
                   focused_w_lr=.01,
                   momentum=.9,
                   verbose=False):
    '''
    Sames as sgdWithLrs bu applies clips after updates
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    #momentum_params_list =[]
    f32 = np.float32
    if verbose:
        print("Params List", params)
    for param, grad in zip(params, grads):
        if verbose:
            print("param name", param.name, "shape:", param.eval().shape)
        #print("param name", param.name, "shape:", param.get_value().shape)

        #grad = clip_tensor(grad, -0.001, 0.001)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], f32(0.01), f32(0.99))

        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], f32(0.01), f32(0.5))

        elif param.name.find('focus') >= 0 and param.name.find('W') >= 0:
            updates[param] = param - (focused_w_lr * grad)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            #updates[param] =clip_tensor(updates[param], -0.5, 0.5)
        else:
            updates[param] = param - learning_rate * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            #if param.name.find('W')>=0:
            #print (param, grad, learning_rate)
    return updates
Ejemplo n.º 2
0
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']), input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        fc = layers.DenseLayer(prev_layer, model['units'], nonlinearity=nonlin)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] == OUTPUT_BOUNDED:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    loss = objectives.squared_error(predictions, targets_var)
    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)

    test_prediction = layers.get_output(output_layer, deterministic=True)
    test_loss = objectives.squared_error(test_prediction,  targets_var)
    test_loss = test_loss.mean()

    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {'train': train_fn, 'eval': val_fn, 'pred': pred_fn, 'layers': output_layer}
Ejemplo n.º 3
0
    def get_cost_updates(self, corruption_level, learning_rate, noise = 0.0, momentum=0):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level, noise)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        L = - T.sum(self.desired * T.log(z) + (1 - self.desired) * T.log(1 - z), axis=1)
        cost = T.mean(L)

        # adagrad with momentum on cost
        updates_ada = adagrad(cost, self.params, learning_rate=learning_rate)
        updates = apply_momentum(updates_ada, self.params, momentum=momentum)

        return (cost, updates)
Ejemplo n.º 4
0
def build_model(n_input,
                n_hidden,
                optimizer=adagrad,
                l2_weight=1e-4,
                l1_weight=1e-2):
    '''
	build NN model to estimating model function
	'''
    global LR

    input_A = L.InputLayer((None, n_input), name='A')
    layer_A = L.DenseLayer(input_A, n_hidden, b=None, nonlinearity=identity)

    input_B = L.InputLayer((None, n_input), name='B')
    layer_B = L.DenseLayer(input_B, n_hidden, b=None, nonlinearity=identity)

    merge_layer = L.ElemwiseSumLayer((layer_A, layer_B))

    output_layer = L.DenseLayer(merge_layer, 1, b=None,
                                nonlinearity=identity)  # output is scalar

    x1 = T.matrix('x1')
    x2 = T.matrix('x2')
    y = T.matrix('y')

    out = L.get_output(output_layer, {input_A: x1, input_B: x2})
    params = L.get_all_params(output_layer)
    loss = T.mean(squared_error(out, y))

    # add l1 penalty
    l1_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l1)

    # add l2 penalty
    l2_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l2)

    # get loss + penalties
    loss = loss + l1_penalty * l1_weight + l2_penalty * l2_weight

    updates_sgd = optimizer(loss, params, learning_rate=LR)
    updates = apply_momentum(updates_sgd, params, momentum=0.9)
    # updates = optimizer(loss,params,learning_rate=LR)

    f_train = theano.function([x1, x2, y], loss, updates=updates)
    f_test = theano.function([x1, x2, y], loss)
    f_out = theano.function([x1, x2], out)

    return f_train, f_test, f_out, output_layer
Ejemplo n.º 5
0
    def get_cost_updates(self, corrupted_input, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

	tilde_x=corrupted_input
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
	#z=corrupted_input
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
 #       L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
 	L=categorical_crossentropy(z,self.x)

       #L = (self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
	#cost=L.mean()

#	temp=(self.x*T.log(z)+(1-self.x)*T.log(1-z))
#	L=-T.sum(temp)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)
#	print cost

	reg=1e-8*lasagne.regularization.l2(self.params[0])
	cost=cost+reg

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params,add_names='True')
	updates_sgd=sgd(cost,self.params,learning_rate)
	updates_dic=apply_momentum(updates_sgd, self.params, momentum=0.9)
	updates=updates_dic.items()
        # generate the list of updates
    #    updates = [
     #       (param, param - learning_rate * gparam)
      #      for param, gparam in zip(self.params, gparams)
       # ]
       	



        return (cost, updates)
Ejemplo n.º 6
0
def sgdWithLrs(loss_or_grads,
               params,
               learning_rate=.01,
               mu_lr=.01,
               si_lr=.001,
               focused_w_lr=.01,
               momentum=.9):
    '''
    # This function provides SGD with different learning rates to focus params mu, si, w
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    momentum_params_list = []
    print(params)
    for param, grad in zip(params, grads):
        # import pdb; pdb.set_trace()
        #grad = clip_tensor(grad, -0.01, 0.01)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            momentum_params_list.append(param)

        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            #momentum_params_list.append(param)

        elif param.name.find('focus') >= 0:
            updates[param] = param - (focused_w_lr * grad)
            momentum_params_list.append(param)

        else:
            updates[param] = param - learning_rate * grad
            momentum_params_list.append(param)
            #print (param, grad, learning_rate)
    return apply_momentum(updates,
                          params=momentum_params_list,
                          momentum=momentum)
Ejemplo n.º 7
0
def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']),
                                input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        W = None
        if model['hidden_nonlinearity'] == 'ReLu':
            W = lasagne.init.GlorotUniform('relu')
        else:
            W = lasagne.init.GlorotUniform(1)

        fc = layers.DenseLayer(prev_layer,
                               model['units'],
                               nonlinearity=nonlin,
                               W=W)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] != OUTPUT_LOG:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    if model['output_mode'] == OUTPUT_NO:
        prediction_unboun = layers.get_output(output_layer)
        loss = objectives.squared_error(prediction_unboun, targets_var)
    else:
        loss = objectives.squared_error(predictions, targets_var)

    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)
    #    test_prediction = layers.get_output(output_layer, deterministic=True)  #fix for dropout
    test_loss = objectives.squared_error(predictions, targets_var)
    test_loss = test_loss.mean()

    if model['hidden_nonlinearity'] == 'ReLu':
        model['lr'] *= 0.5
    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    #    pred_fn = theano.function([input_data], prediction_unboun)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {
        'train': train_fn,
        'eval': val_fn,
        'pred': pred_fn,
        'layers': output_layer
    }
 def rmsprop_momentum(loss, params, eta=1e-3, alpha=0.9, **kwargs):
     rms = updt.rmsprop(loss, params, learning_rate=eta, **kwargs)
     return updt.apply_momentum(rms, params, momentum=alpha)
Ejemplo n.º 9
0
all_params = get_all_params(layers, trainable=True)
# compute loss
generation = lasagne.layers.get_output(net)
generation = generation.dimshuffle([0, 2, 3, 1])
# mean squared error
train_loss = lasagne.objectives.squared_error(
    generation.reshape((generation.shape[0], -1)),
    img_batch_target.reshape((img_batch_target.shape[0], -1)))
train_loss = train_loss.sum(axis=1)
train_loss = train_loss.mean()
# update
lrn_rate = T.cast(theano.shared(options['learning_rate']),
                  'floatX')  # we can use dynamic learning rate
optimizer = sgd
updates_sgd = optimizer(train_loss, all_params, learning_rate=lrn_rate)
updates = apply_momentum(updates_sgd, all_params, momentum=0.95)
# train
_train = theano.function([img_batch, pose_code, img_batch_target],
                         train_loss,
                         updates=updates,
                         allow_input_downcast=True)

# ------------ training ----------------
print("Train...")
if options['start_epoch'] == 0:
    start_epoch = 0
else:
    model.load_model(options['init_model_from'])
    start_epoch = options['start_epoch']

nb_epoch = options['max_epochs']
Ejemplo n.º 10
0
def sgdWithWeightSupress(loss_or_grads,
                         params,
                         learning_rate=.01,
                         mu_lr=.01,
                         si_lr=.001,
                         focused_w_lr=.01,
                         momentum=.9,
                         verbose=False):
    ''' this update function masks focus weights after they are updated.
    The idea is that weights outside of the focus function must be suppressed
    to prevent weight memory when focus changes it            print("Hey weight shape::",mu_si_w[param.name].shape)
s position
    
    To do this I get mu and si values of the focus layer, calculate a Gauss,
    window scale it so the center is 1 but outside is close to 0, and then multiply
    it with the weights.
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    #momentum_params_list =[]
    if verbose:
        print(params)
    for param, grad in zip(params, grads):

        #grad = clip_tensor(grad, -0.001, 0.001)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], 0.01, 0.99)

        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], 0.01, 0.5)

        elif param.name.find('focus') >= 0 and param.name.find('W') >= 0:
            param_layer_name = param.name.split(".")[0]
            mu_name = param_layer_name + '.mu'

            si_name = param_layer_name + ".si"
            mu_si_w = get_params_values_wkey(params,
                                             [mu_name, si_name, param.name])
            from focusing import U_numeric
            us = U_numeric(np.linspace(0, 1, mu_si_w[param.name].shape[0]),
                           mu_si_w[mu_name],
                           mu_si_w[si_name],
                           1,
                           normed=False)

            updates[param] = (param - (focused_w_lr * grad))

            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            # here we are masking the weights, so they can not stay out of envelope
            us[us > 0.1] = 1.0
            updates[param] = updates[param] * us.T
            #updates[param] = updates[param]*, -0.5, 0.5)
        else:
            updates[param] = param - learning_rate * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            #print (param, grad, learning_rate)
    return updates
Ejemplo n.º 11
0
def sgdWithLrLayers(loss_or_grads,
                    params,
                    learning_rate=.01,
                    mu_lr=.01,
                    si_lr=.001,
                    focused_w_lr=.01,
                    momentum=.9):
    '''
    # This function updates each layer parameters with a different learning rate. 
    Under dev.
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    #momentum_params_list =[]
    #print(params)
    for param, grad in zip(params, grads):
        # import pdb; pdb.set_trace()
        grad = clip_tensor(grad, -0.01, 0.01)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum / 2)
            updates[param] = clip_tensor(updates[param], 0.05, 0.95)
            #momentum_params_list.append(param)
            #print (param,mu_lr)
            #print (param, grad, mu_lr)
        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            #momentum_params_list.append(param)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], 0.01, 0.5)

            #print (param,si_lr)
            #print (param, grad, si_lr)
            #print (param, grad, scaler_lr)
        elif param.name.find('focus') >= 0 and (param.name.find('W') >= 0 or
                                                param.name.find('bias') >= 0):
            level = int(str.split(param.name, '-')[1].split('.')[0])
            #print(param.name, level)
            updates[param] = param - (learning_rate * (1. /
                                                       (level + 1))) * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            if (param.name.find('W') >= 0):
                updates[param] = clip_tensor(updates[param], -0.4, 0.4)
            #momentum_params_list.append(param)
            #print (param,focused_w_lr)
        elif param.name.find('W') >= 0 or param.name.find('b') >= 0:
            if param.name.find('-') >= 0:
                level = int(str.split(param.name, '-')[1].split('.')[0])
                updates[param] = param - (learning_rate * (1. / level)) * grad
                updates = apply_momentum(updates,
                                         params=[param],
                                         momentum=momentum)
            else:
                updates[param] = param - (learning_rate) * grad
            #momentum_params_list.append(param)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            if (param.name.find('W') >= 0):
                updates[param] = clip_tensor(updates[param], -0.4, 0.4)

            if (param.name.find('b') >= 0):
                updates[param] = clip_tensor(updates[param], -1.0, 1.0)
        else:
            updates[param] = param - (learning_rate) * grad
            #momentum_params_list.append(param)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            if (param.name.find('beta') >= 0):
                updates[param] = clip_tensor(updates[param], -1., 1.)
            #print (param, grad, learning_rate)

    return updates