def test_apply_penalty(self):
        from lasagne.regularization import apply_penalty, l2
        A = T.vector()
        B = T.matrix()

        assert apply_penalty([], l2) == 0

        assert equal_computations([apply_penalty(A, l2)], [l2(A)])

        assert equal_computations([apply_penalty([A, B], l2)],
                                  [sum([l2(A), l2(B)])])
def build_model_focusing(input_feas, classes, hidden_count, batchnorm=True):
    # Initers, Layers
    ini = lasagne.init.HeUniform()
    nonlin = lasagne.nonlinearities.rectify
    linear = lasagne.nonlinearities.linear
    softmax = lasagne.nonlinearities.softmax

    # Input Layer

    l_in = lasagne.layers.InputLayer(shape=(None, input_feas))

    l_focus1 = FocusedLayer1D(l_in,
                              num_units=hidden_count,
                              nonlinearity=linear,
                              name='focus1',
                              trainMus=UPDATE_MU,
                              trainSis=UPDATE_SI,
                              initMu=INIT_MU,
                              W=ini,
                              withWeights=WITH_WEIGHTS,
                              bias=lasagne.init.Constant(0.0),
                              initSigma=INIT_SI,
                              scaler=INIT_SCALER,
                              weight_gain=1.0,
                              trainScaler=UPDATE_SCAlER,
                              trainWs=True)

    if batchnorm:
        # if you close BATCHNORM weights get LARGE
        l_bn = lasagne.layers.NonlinearityLayer(
            lasagne.layers.BatchNormLayer(l_focus1), nonlinearity=nonlin)

    else:

        l_bn = lasagne.layers.NonlinearityLayer(l_focus1, nonlinearity=nonlin)

    #l_drop1 = lasagne.layers.dropout(l_bn, p=0.1)

    # Output
    l_out = lasagne.layers.DenseLayer(l_bn,
                                      num_units=classes,
                                      nonlinearity=softmax,
                                      W=ini,
                                      name='output')

    penalty = l2(l_out.W) * 1e-3
    if WITH_WEIGHTS:
        penalty += l2(l_focus1.W) * 1e-4 + (l1(l_focus1.W) *
                                            1e-6) + l2(l_focus1.si) * 1e-2

    if not USE_PENALTY:
        penalty = penalty * 0

    return l_out, penalty
    def test_apply_penalty(self):
        from lasagne.regularization import apply_penalty, l2
        A = T.vector()
        B = T.matrix()

        assert apply_penalty([], l2) == 0

        assert equal_computations([apply_penalty(A, l2)],
                                  [l2(A)])

        assert equal_computations([apply_penalty([A, B], l2)],
                                  [sum([l2(A), l2(B)])])
def build_model(input_feas, classes, hidden_count, batchnorm=True):
    # Initers, Layers
    ini = lasagne.init.GlorotUniform()

    nonlin = lasagne.nonlinearities.rectify
    softmax = lasagne.nonlinearities.softmax

    lin = lasagne.nonlinearities.linear

    # Input Layer
    l_in = lasagne.layers.InputLayer(shape=(None, input_feas))

    # Denses
    l_dense1 = lasagne.layers.DenseLayer(l_in,
                                         num_units=hidden_count,
                                         nonlinearity=lin,
                                         W=ini,
                                         name="dense1",
                                         b=None)

    if batchnorm:
        # if you close BATCHNORM weights get LARGE
        l_bn = lasagne.layers.NonlinearityLayer(
            lasagne.layers.BatchNormLayer(l_dense1), nonlinearity=nonlin)

    else:

        l_bn = lasagne.layers.NonlinearityLayer(l_dense1, nonlinearity=nonlin)
    #l_dense2 = lasagne.layers.DenseLayer(l_dense1, num_units=4, nonlinearity=lasagne.nonlinearities.tanh, W=ini, name='dense2')

    #l_drop1 = lasagne.layers.dropout(l_bn, p=0.1)

    # Output Layer
    l_out = lasagne.layers.DenseLayer(l_bn,
                                      num_units=classes,
                                      nonlinearity=softmax,
                                      W=ini,
                                      name='output')

    penalty = (l2(l_dense1.W) * 1e-4) + (l1(l_dense1.W) *
                                         1e-6) + (l2(l_out.W) * 1e-3)
    if not USE_PENALTY:
        penalty = penalty * 0

    #penalty = penalty*0
    #penalty = (l2(l_dense1.W)*1e-30)#(l2(l_dense1.W)*1e-3)+(l1(l_dense1.W)*1e-6) +(l2(l_out.W)*1e-3)

    return l_out, penalty
Esempio n. 5
0
    def get_loss(self, input=None, target=None, aggregation=None, **kwargs):
        """
        Get loss scalar expression

        :parameters:
            - input : (default `None`) an expression that results in the
                input data that is passed to the network
            - target : (default `None`) an expression that results in the
                desired output that the network is being trained to generate
                given the input
            - aggregation : None to use the value passed to the
                constructor or a value to override it
            - kwargs : additional keyword arguments passed to `input_layer`'s
                `get_output` method

        :returns:
            - output : loss expressions
        """
        network_output = lasagne.layers.get_output(self.input_layer, input, **kwargs)
        if target is None:
            target = self.target_var
        if aggregation not in self._valid_aggregation:
            raise ValueError('aggregation must be \'mean\', \'sum\', '
                             'or None, not {0}'.format(aggregation))
        if aggregation is None:
            aggregation = self.aggregation

        losses = self.loss_function(network_output, target) + self.l2_strength*l2(self.input_layer)

        if aggregation is None or aggregation == 'mean':
            return losses.mean()
        elif aggregation == 'sum':
            return losses.sum()
        else:
            raise RuntimeError('This should have been caught earlier')
Esempio n. 6
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(RMSE, self).get_loss(input=input,
                                       target=target,
                                       deterministic=deterministic,
                                       **kwargs)
     loss = loss**0.5 + self.alpha * l2(self.input_layer)
     return loss
Esempio n. 7
0
def synth_compiled(input_layer, output_layer, I):
    which_class = 0
    LAM = 0.1

    theImage = theano.shared(I, name='theImage')
    params = [theImage]

    classNeuron = get_output(
        output_layer, deterministic=True, inputs=theImage
    )[0,
      which_class]  # the zero is needed to get a scalar gradient (otherwise we would a single number per image)! we want a single image anyways

    regularized_score = -(classNeuron - LAM * l2(theImage)
                          )  # turn this into a loss that ADAM minimizes
    theGrad = T.grad(regularized_score, theImage)

    updates = lasagne.updates.adam([theGrad], params, learning_rate=0.1)
    synth_fn = theano.function([], [regularized_score], updates=updates)

    terr = []
    bar = progressbar.ProgressBar()
    for i in bar(range(1000)):
        terr.append(synth_fn())  # this also updates the params=images
        # print(terr[-1])

    Isynth = np.array(theImage.eval())
    figure()
    imshow(Isynth[0, 0])
    figure()
    imshow(Isynth[0, 1])
Esempio n. 8
0
    def get_cost_prior(self):
        prior_cost = 0
        params = self.get_params()
        for param in params:
            if param.name == 'W':
                prior_cost += regularization.l2(param).sum()

        return prior_cost
Esempio n. 9
0
    def get_cost_prior(self):
        prior_cost = 0
        params = self.get_params()
        for param in params:
            if param.name == 'W':
                prior_cost += regularization.l2(param).sum()

        return prior_cost
Esempio n. 10
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(MyObjective, self).get_loss(input=input,
                                              target=target,
                                              deterministic=deterministic,
                                              **kwargs)
     if not deterministic:
         return loss + self.magicnum * l2(self.input_layer)
     else:
         return loss
Esempio n. 11
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(MyObjective, self).get_loss(input=input,
                                              target=target,
                                              deterministic=deterministic,
                                              **kwargs)
     if not deterministic:
         return loss + self.magicnum * l2(self.input_layer)
     else:
         return loss
Esempio n. 12
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(L2Regularization,
                  self).get_loss(input=input,
                                 target=target,
                                 deterministic=deterministic,
                                 **kwargs)
     if not deterministic:
         return loss + self.alpha * l2(self.input_layer)
     else:
         return loss
Esempio n. 13
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(WeightDecayObjective,
                  self).get_loss(input=input,
                                 target=target,
                                 deterministic=deterministic,
                                 **kwargs)
     if not deterministic:
         return loss + self.weight_decay * regularization.l2(
             self.input_layer)
     else:
         return loss
Esempio n. 14
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(RMSE, self).get_loss(input=input,target=target, deterministic=deterministic, **kwargs)
     loss = loss**0.5 + self.alpha * l2(self.input_layer)
     return loss 
Esempio n. 15
0
 def get_loss(self, input=None, target=None, deterministic=False, **kwargs):
     loss = super(L2Regularization, self).get_loss(input=input,target=target, deterministic=deterministic, **kwargs)
     if not deterministic:
         return loss + self.alpha * l2(self.input_layer)
     else:
         return loss
Esempio n. 16
0
def reset():
    if any(np.isnan(scale.get_value()) for scale in scales):
        for scale in scales:
            scale.set_value(1.)
    for l in l_hiddens:
        l.b.set_value(Constant()(l.b.get_value().shape))
        l.W.set_value(Orthogonal()(l.W.get_value().shape))
    l_out.b.set_value(Constant()(l_out.b.get_value().shape))
    l_out.W.set_value(Orthogonal()(l_out.W.get_value().shape))
    for p in (p for u in (updates_ada, updates_other, updates_scal) for p in u
              if p not in get_all_params(l_out)):
        p.set_value(Constant()(p.get_value().shape))


chunky_l2 = apply_penalty(get_all_params(l_out, regularizable=True), l2) - l2(
    l_hiddens[0].W) + l2(l_hiddens[0].W / T.reshape(vscale, (206279, 1)))
chunky_l1 = apply_penalty(get_all_params(l_out, regularizable=True), l1) - l1(
    l_hiddens[0].W) + l1(l_hiddens[0].W / T.reshape(vscale, (206279, 1)))
simple_l2 = apply_penalty(get_all_params(l_out, regularizable=True), l2)
#l_out2 = DenseLayer(dropout(l_hiddens2[-1]), num_units=y.shape[1])
#l_out = lasagne.layers.NonlinearityLayer(lasagne.layers.ElemwiseSumLayer((l_out1,l_out2),.5), softmax)

#categorical_crossentropy(get_output(l_out)[train_indice])

target = T.fmatrix(name="target")
#f=theano.function([l_in.input_var],get_output(l_out),allow_input_downcast=True)
#f(X[0,:].toarray())

loss = categorical_crossentropy(get_output(l_out), target).mean()
# train_loss_smoo=categorical_crossentropy(get_output(l_out,deterministic=True)[train_indices,],target[train_indices,]).mean()
# valid_loss=categorical_crossentropy(get_output(l_out)[valid_indices,],target[valid_indices,]).mean()
def build_model(var_x, input_size_x, var_y, input_size_y, layer_sizes,
                weight_init=lasagne.init.GlorotUniform(), drop_prob=None, train_gamma_layer=None, **kwargs):
    layer_types = Params.LAYER_TYPES

    # Create x to y network
    model_x, hidden_x, weights_x, biases_x, prediction_y, hooks_x, dropouts_x = build_single_channel(var_x,
                                                                                                     input_size_x,
                                                                                                     input_size_y,
                                                                                                     layer_sizes,
                                                                                                     layer_types,
                                                                                                     weight_init,
                                                                                                     lasagne.init.Constant(
                                                                                                         0.),
                                                                                                     drop_prob, 'x',
                                                                                                     train_gamma_layer=train_gamma_layer)

    weights_y = [transpose_recursive(w) for w in reversed(weights_x)]
    bias_y = lasagne.init.Constant(0.)

    model_y, hidden_y, weights_y, biases_y, prediction_x, hooks_y, dropouts_y = build_single_channel(var_y,
                                                                                                     input_size_y,
                                                                                                     input_size_x,
                                                                                                     list(reversed(
                                                                                                         layer_sizes)),
                                                                                                     list(reversed(
                                                                                                         layer_types)),
                                                                                                     weights_y,
                                                                                                     bias_y,
                                                                                                     drop_prob, 'y',
                                                                                                     dropouts_x,
                                                                                                     train_gamma_layer)

    reversed_hidden_y = list(reversed(hidden_y))

    hooks = {}
    if "BatchNormalizationLayer:movingavg" in hooks_x:
        # Merge the two dictionaries
        hooks = hooks_x
        hooks["BatchNormalizationLayer:movingavg"].extend(hooks_y["BatchNormalizationLayer:movingavg"])
        # hooks["WhiteningLayer:movingavg"].extend(hooks_y["WhiteningLayer:movingavg"])

    loss_x = Params.LOSS_X * lasagne.objectives.squared_error(var_x, prediction_x).sum(axis=1).mean()
    loss_y = Params.LOSS_Y * lasagne.objectives.squared_error(var_y, prediction_y).sum(axis=1).mean()

    if len(hidden_x) % 2 == 0:
        middle_layer = int(len(hidden_x) / 2.) - 1
    else:
        middle_layer = int(floor(float(len(hidden_x)) / 2.))

    hooks_temp = {}

    layer_x = lasagne.layers.get_output(hidden_x[Params.TEST_LAYER], moving_avg_hooks=hooks_temp)
    layer_y = lasagne.layers.get_output(reversed_hidden_y[Params.TEST_LAYER], moving_avg_hooks=hooks_temp)

    loss_l2 = Params.L2_LOSS * lasagne.objectives.squared_error(layer_x, layer_y).sum(axis=1).mean()

    loss_weight_decay = 0

    shrinkage = Params.SHRINKAGE

    cov_x = T.dot(layer_x.T, layer_x) / T.cast(layer_x.shape[0], dtype=T.config.floatX)
    cov_y = T.dot(layer_y.T, layer_y) / T.cast(layer_x.shape[0], dtype=T.config.floatX)

    # mu_x = T.nlinalg.trace(cov_x) / layer_x.shape[1]
    # mu_y = T.nlinalg.trace(cov_y) / layer_y.shape[1]

    # cov_x = (1. - shrinkage) * cov_x + shrinkage * mu_x * T.identity_like(cov_x)
    # cov_y = (1. - shrinkage) * cov_y + shrinkage * mu_y * T.identity_like(cov_y)

    # loss_withen_x = Params.WITHEN_REG_X * T.mean(T.sum(abs(cov_x - T.identity_like(cov_x)), axis=0))
    # loss_withen_y = Params.WITHEN_REG_Y * T.mean(T.sum(abs(cov_y - T.identity_like(cov_y)), axis=0))

    loss_withen_x = Params.WITHEN_REG_X * (T.sqrt(T.sum(T.sum(cov_x ** 2))) - T.sqrt(T.sum(T.diag(cov_x) ** 2)))
    loss_withen_y = Params.WITHEN_REG_Y * (T.sqrt(T.sum(T.sum(cov_y ** 2))) - T.sqrt(T.sum(T.diag(cov_y) ** 2)))

    loss_weight_decay += lasagne.regularization.regularize_layer_params(model_x,
                                                                        penalty=l2) * Params.WEIGHT_DECAY
    loss_weight_decay += lasagne.regularization.regularize_layer_params(model_y,
                                                                        penalty=l2) * Params.WEIGHT_DECAY

    gamma_x = lasagne.layers.get_all_params(model_x, gamma=True)
    gamma_y = lasagne.layers.get_all_params(model_y, gamma=True)

    loss_gamma = T.constant(0)
    loss_gamma += sum(l2(gamma) for gamma in gamma_x) * Params.GAMMA_COEF
    loss_gamma += sum(l2(gamma) for gamma in gamma_y) * Params.GAMMA_COEF

    loss = loss_x + loss_y + loss_l2 + loss_weight_decay + loss_withen_x + loss_withen_y + loss_gamma

    output = {
        'loss_x': loss_x,
        'loss_y': loss_y,
        'loss_l2': loss_l2,
        'loss_weight_decay': loss_weight_decay,
        'loss_gamma': loss_gamma,
        'loss_withen_x': loss_withen_x,
        'loss_withen_y': loss_withen_y,
        'mean_x': T.mean(T.mean(layer_x, axis=0)),
        'mean_y': T.mean(T.mean(layer_y, axis=0)),
        'var_x': T.mean(T.var(layer_x, axis=0)),
        'var_y': T.mean(T.var(layer_y, axis=0)),
        'var_mean_x': T.var(T.mean(layer_x, axis=0)),
        'var_mean_y': T.var(T.mean(layer_y, axis=0))
    }

    return model_x, model_y, hidden_x, reversed_hidden_y, loss, output, hooks
Esempio n. 18
0
y_train = T.cast(theano.shared(np.load('/root/proj/MIT_dumped/y_train.npy')),'int32')
y_test = T.cast(theano.shared(np.load('/root/proj/MIT_dumped/y_test.npy')),'int32')
# load datasets
X_train_fc7 = theano.shared(np.load('/root/proj/MIT_dumped/X_train_fc7.npy').astype(theano.config.floatX))
X_test_fc7 = theano.shared(np.load('/root/proj/MIT_dumped/X_test_fc7.npy').astype(theano.config.floatX))

all_params = layers.get_all_params(output)

objective = objectives.Objective(output,loss_function=objectives.multinomial_nll)
loss_train = objective.get_loss([X_batch_one, X_batch_two], target=y_batch)


LEARNING_RATE =0.0122
MOMENTUM=0.9
REG = .0009
reg_loss = regularization.l2(output) * REG
total_loss = loss_train + reg_loss
upds = updates.nesterov_momentum(total_loss, all_params, LEARNING_RATE, MOMENTUM)
pred = T.argmax(
    output.get_output([X_batch_one, X_batch_two], deterministic=True), axis=1)
accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

print "begin compiling"
givens =    {X_batch_one: X_train_fc6[batch_index*batch_size:(batch_index+1)*batch_size],
            X_batch_two: X_train_fc7[batch_index*batch_size:(batch_index+1)*batch_size],
            y_batch: y_train[batch_index*batch_size:(batch_index+1)*batch_size]}
train = theano.function([batch_index], loss_train, updates=upds, givens=givens)
test = theano.function([], accuracy, givens={X_batch_one:X_test_fc6, X_batch_two:X_test_fc7, y_batch:y_test})
num_epochs = 1000
for epoch in range(num_epochs):
    print "epoch %s" % epoch
Esempio n. 19
0
def build_model(var_x,
                input_size_x,
                var_y,
                input_size_y,
                layer_sizes,
                weight_init=lasagne.init.GlorotUniform()):
    """
    Creates at bi-directional model, containing two channels from var_x to the reconstruction of var_y and vice versa,
    the returned value contains also a the composite loss term.
    The loss term is composed of:

    1. The reconstruction loss between X and X' and Y and Y' (X' and Y' are the output of each channel)

    2. The reconstruction loss of the OUTPUT_LAYER from both channels

    3. The covariance regularization which aims to decorralted each output internally

    4. The gamma regularization, equals to the the sum of the squared norm of 1/gamma (the batch normalization parameter)

    5. Weight decay

    :param var_x: theano variable for the input x view
    :param input_size_x: size of x dimensionality
    :param var_y: theano variable for the input y view
    :param input_size_y: size of y dimensionality
    :param layer_sizes: array containing the sizes of hidden layers
    :param weight_init: initialization function for the weights
    :return:
    """
    layer_types = Params.LAYER_TYPES

    # Create x to y network
    model_x, hidden_x, weights_x, biases_x, prediction_y, hooks_x, dropouts_x = build_single_channel(
        var_x, input_size_x, input_size_y, layer_sizes, layer_types,
        weight_init, lasagne.init.Constant(0.), 'x')

    weights_y = [transpose_recursive(w) for w in reversed(weights_x)]
    bias_y = lasagne.init.Constant(0.)

    model_y, hidden_y, weights_y, biases_y, prediction_x, hooks_y, dropouts_y = build_single_channel(
        var_y, input_size_y, input_size_x, list(reversed(layer_sizes)),
        list(reversed(layer_types)), weights_y, bias_y, 'y', dropouts_x)

    reversed_hidden_y = list(reversed(hidden_y))

    hooks = {}
    if "BatchNormalizationLayer:movingavg" in hooks_x:
        # Merge the two dictionaries
        hooks = hooks_x
        hooks["BatchNormalizationLayer:movingavg"].extend(
            hooks_y["BatchNormalizationLayer:movingavg"])
        # hooks["WhiteningLayer:movingavg"].extend(hooks_y["WhiteningLayer:movingavg"])

    loss_x = Params.LOSS_X * lasagne.objectives.squared_error(
        var_x, prediction_x).sum(axis=1).mean()
    loss_y = Params.LOSS_Y * lasagne.objectives.squared_error(
        var_y, prediction_y).sum(axis=1).mean()

    hooks_temp = {}

    layer_x = lasagne.layers.get_output(hidden_x[Params.OUTPUT_LAYER],
                                        moving_avg_hooks=hooks_temp)
    layer_y = lasagne.layers.get_output(reversed_hidden_y[Params.OUTPUT_LAYER],
                                        moving_avg_hooks=hooks_temp)

    loss_l2 = Params.L2_LOSS * lasagne.objectives.squared_error(
        layer_x, layer_y).sum(axis=1).mean()

    loss_weight_decay = 0

    cov_x = T.dot(layer_x.T, layer_x) / T.cast(layer_x.shape[0],
                                               dtype=T.config.floatX)
    cov_y = T.dot(layer_y.T, layer_y) / T.cast(layer_x.shape[0],
                                               dtype=T.config.floatX)

    loss_withen_x = Params.WITHEN_REG_X * (T.sqrt(T.sum(T.sum(cov_x**2))) -
                                           T.sqrt(T.sum(T.diag(cov_x)**2)))
    loss_withen_y = Params.WITHEN_REG_Y * (T.sqrt(T.sum(T.sum(cov_y**2))) -
                                           T.sqrt(T.sum(T.diag(cov_y)**2)))

    loss_weight_decay += lasagne.regularization.regularize_layer_params(
        model_x, penalty=l2) * Params.WEIGHT_DECAY
    loss_weight_decay += lasagne.regularization.regularize_layer_params(
        model_y, penalty=l2) * Params.WEIGHT_DECAY

    gamma_x = lasagne.layers.get_all_params(model_x, gamma=True)
    gamma_y = lasagne.layers.get_all_params(model_y, gamma=True)

    loss_gamma = T.constant(0)
    loss_gamma += sum(l2(1 / gamma) for gamma in gamma_x) * Params.GAMMA_COEF
    loss_gamma += sum(l2(1 / gamma) for gamma in gamma_y) * Params.GAMMA_COEF

    loss = loss_x + loss_y + loss_l2 + loss_weight_decay + loss_withen_x + loss_withen_y + loss_gamma

    output = {
        'loss_x': loss_x,
        'loss_y': loss_y,
        'loss_l2': loss_l2,
        'loss_weight_decay': loss_weight_decay,
        'loss_gamma': loss_gamma,
        'loss_withen_x': loss_withen_x,
        'loss_withen_y': loss_withen_y,
        'mean_x': T.mean(T.mean(layer_x, axis=0)),
        'mean_y': T.mean(T.mean(layer_y, axis=0)),
        'var_x': T.mean(T.var(layer_x, axis=0)),
        'var_y': T.mean(T.var(layer_y, axis=0)),
        'var_mean_x': T.var(T.mean(layer_x, axis=0)),
        'var_mean_y': T.var(T.mean(layer_y, axis=0))
    }

    return model_x, model_y, hidden_x, reversed_hidden_y, loss, output, hooks
Esempio n. 20
0
def synthesize_image(input_layer,
                     output_layer,
                     inputshape,
                     which_class,
                     gradient_steps,
                     gradient_stepsize,
                     LAM,
                     chopNonlin=True,
                     I0=None):
    """
    does gradient ascend in image space to maximize a certain class score, hence producing an image
    that maximizes a class
    :param input_layer:
    :param output_layer:
    :param inputshape:
    :param gradient_steps:
    :param gradient_stepsize:
    :param chopNonlin: maximize the class score before or after the nonlinearity: =True-> maximize the unnormalized score
    :param I0: optinally, put in an image from which we start the optimization. Could be a natural image in whihc we want to enhance the features of the clas
           If None, random initialization will be made
    :return:
    """

    assert inputshape[0] == 1
    input_var = input_layer.input_var

    if chopNonlin:
        before_non = _get_output_before_nonlinearity(output_layer,
                                                     deterministic=True)
        classNeuron = before_non[0, which_class]
    else:
        classNeuron = get_output(
            output_layer, deterministic=True
        )[0,
          which_class]  # the zero is needed to get a scalar gradient (otherwise we would a single number per image)! we want a single image anyways

    regularized_score = classNeuron - LAM * l2(
        input_var
    )  # mind the SIGN: we MAXIMIZE class_prob, hence l2 must be subtracted
    theGrad = T.grad(regularized_score, input_var)

    gradient_fn = theano.function([input_var], theGrad)
    score_fn = theano.function([input_var], regularized_score)

    # starting point of gradient ascend:
    if I0 is None:
        # I = np.zeros(inputshape,dtype='float32')
        I = np.random.normal(0, 1, inputshape).astype('float32')
    else:
        I = np.copy(
            I0)  # otherwise we would modifiy the original image as a sideffect
        assert I.shape == inputshape


# #((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((
# #
#     loss = - regularized_score # we want to maximize the score hence the minus sign
#
    params = [theano.shared(I, name='theImage')]
    #     params = [theano.shared(input_var)]

    #
    # updates = lasagne.updates.adam(loss, params) # **optimizer_params
    #
    #     train_fn = theano.function([input_var, target_var], [loss, train_acc], updates=updates)

    #     # ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((

    I_progress = []
    score_progress = []
    bar = progressbar.ProgressBar()
    for i in bar(range(gradient_steps)):
        if i % 10 == 0:
            I_progress.append(I.copy())
        gr = gradient_fn(I)
        I += gr * gradient_stepsize
        the_score = score_fn(I)
        score_progress.append(the_score)
        # print("%d\t%.3f" % (i, the_score))

    plt.figure()
    plt.plot(score_progress)
    return I_progress, score_progress
Esempio n. 21
0
for i in xrange(0):
    l_hiddens.append(DenseLayer(dropout(l_hiddens[-1]), num_units=100, nonlinearity=rectify))
l_out = DenseLayer(dropout(l_hiddens[-1]), num_units=y.shape[1], nonlinearity=softmax, W=Orthogonal())

def reset():
    if any(np.isnan(scale.get_value()) for scale in scales):
        for scale in scales:
            scale.set_value(1.)
    for l in l_hiddens:
        l.b.set_value(Constant()(l.b.get_value().shape))
        l.W.set_value(Orthogonal()(l.W.get_value().shape))
    l_out.b.set_value(Constant()(l_out.b.get_value().shape))
    l_out.W.set_value(Orthogonal()(l_out.W.get_value().shape))
    for p in (p for u in (updates_ada,updates_other,updates_scal) for p in u if p not in get_all_params(l_out)):
        p.set_value(Constant()(p.get_value().shape))
chunky_l2 = apply_penalty(get_all_params(l_out,regularizable=True),l2)-l2(l_hiddens[0].W)+l2(l_hiddens[0].W/T.reshape(vscale,(206279,1)))
chunky_l1 = apply_penalty(get_all_params(l_out,regularizable=True),l1)-l1(l_hiddens[0].W)+l1(l_hiddens[0].W/T.reshape(vscale,(206279,1)))
simple_l2 = apply_penalty(get_all_params(l_out,regularizable=True),l2)
#l_out2 = DenseLayer(dropout(l_hiddens2[-1]), num_units=y.shape[1])
#l_out = lasagne.layers.NonlinearityLayer(lasagne.layers.ElemwiseSumLayer((l_out1,l_out2),.5), softmax)

#categorical_crossentropy(get_output(l_out)[train_indice])

target=T.fmatrix(name="target")
#f=theano.function([l_in.input_var],get_output(l_out),allow_input_downcast=True)
#f(X[0,:].toarray())

loss=categorical_crossentropy(get_output(l_out),target).mean()
# train_loss_smoo=categorical_crossentropy(get_output(l_out,deterministic=True)[train_indices,],target[train_indices,]).mean()
# valid_loss=categorical_crossentropy(get_output(l_out)[valid_indices,],target[valid_indices,]).mean()
# valid_loss_smoo=categorical_crossentropy(get_output(l_out,deterministic=True)[valid_indices,],target[valid_indices,]).mean()