Ejemplo n.º 1
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer     = InputLayer(shape=(None, 12, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer     = DimshuffleLayer(layer, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = Conv3DDNNLayer(incoming=layer, num_filters=1, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=sigmoid)
    layer_prediction  = layer

    # Loss
    prediction           = get_output(layer_prediction)
    loss                 = binary_crossentropy(prediction[:,0,:,:,:], target_var).mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True)
    test_loss            = binary_crossentropy(test_prediction[:,0,:,:,:], target_var).mean()

    return test_prediction, prediction, loss, params
Ejemplo n.º 2
0
def get_model(input_images, input_position, input_mult, target_var):

    # number of SAX and distance between SAX slices
    #indexes = []
    #for i in range(input_position.shape[0]):
    #    indexes.append(numpy.where(input_position[i][:,0] == 0.)[0][0])
    
    # input layer with unspecified batch size
    layer     = InputLayer(shape=(None, 22, 30, 64, 64), input_var=input_images) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    
    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = Conv3DDNNLayer(incoming=layer, num_filters=22, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=sigmoid)

    layer_max     = ExpressionLayer(layer, lambda X: X.max(1), output_shape='auto')
    layer_min     = ExpressionLayer(layer, lambda X: X.min(1), output_shape='auto')
    
    layer_prediction = layer
    # image prediction
    prediction           = get_output(layer_prediction)
        
    loss                 = binary_crossentropy(prediction, target_var).mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True)
    test_loss            = binary_crossentropy(test_prediction, target_var).mean()

    return test_prediction, prediction, loss, params
Ejemplo n.º 3
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer = InputLayer(
        shape=(None, 12, 64, 64), input_var=input_var
    )  #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer = DimshuffleLayer(layer, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer = batch_norm(
        Conv3DDNNLayer(incoming=layer,
                       num_filters=16,
                       filter_size=(3, 3, 3),
                       stride=(1, 1, 1),
                       pad='same',
                       nonlinearity=rectify))
    layer = batch_norm(
        Conv3DDNNLayer(incoming=layer,
                       num_filters=16,
                       filter_size=(3, 3, 3),
                       stride=(1, 1, 1),
                       pad='same',
                       nonlinearity=rectify))
    layer = batch_norm(
        Conv3DDNNLayer(incoming=layer,
                       num_filters=16,
                       filter_size=(3, 3, 3),
                       stride=(1, 1, 1),
                       pad='same',
                       nonlinearity=rectify))
    layer = Conv3DDNNLayer(incoming=layer,
                           num_filters=1,
                           filter_size=(3, 3, 3),
                           stride=(1, 1, 1),
                           pad='same',
                           nonlinearity=sigmoid)
    layer_prediction = layer

    # Loss
    prediction = get_output(layer_prediction)
    loss = binary_crossentropy(prediction[:, 0, :, :, :], target_var).mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction = get_output(layer_prediction, deterministic=True)
    test_loss = binary_crossentropy(test_prediction[:, 0, :, :, :],
                                    target_var).mean()

    return test_prediction, prediction, loss, params
Ejemplo n.º 4
0
def calc_loss_multi(prediction, targets):

    #we need to clip predictions when calculating the log-loss
    prediction = T.clip(prediction, 0.0000001, 0.9999999)

    #binary crossentropy is the best choice for a multi-class sigmoid output
    loss = T.mean(objectives.binary_crossentropy(prediction, targets))

    return loss
Ejemplo n.º 5
0
def compile_model(input_var, target_var, net):

    prediction = layers.get_output(net['out'])
    loss = binary_crossentropy(prediction, target_var)
    loss = lasagne.objectives.aggregate(loss)

    params = layers.get_all_params(net['out'], trainable=True)
    updates = lasagne.updates.adam(loss, params, learning_rate=1e-5)

    test_prediction = layers.get_output(net['out'], deterministic=True)
    test_loss = binary_crossentropy(test_prediction, target_var)
    test_loss = lasagne.objectives.aggregate(test_loss)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    val_fn = theano.function([input_var, target_var], test_loss)
    prob_fn = theano.function([input_var], test_prediction)

    return train_fn, val_fn, prob_fn
Ejemplo n.º 6
0
def test_binary_crossentropy(colvect):
    # symbolic version
    from lasagne.objectives import binary_crossentropy
    if not colvect:
        p, t = theano.tensor.matrices('pt')
        c = binary_crossentropy(p, t)
    else:
        # check that for convenience, comparing a prediction column vector
        # against a 1D target vector does not lead to broadcasting
        p, t = theano.tensor.vectors('pt')
        c = binary_crossentropy(p.dimshuffle(0, 'x'), t)[:, 0]
    # numeric version
    floatX = theano.config.floatX
    shape = (10, 20) if not colvect else (10, )
    predictions = np.random.rand(*shape).astype(floatX)
    targets = np.random.rand(*shape).astype(floatX)
    crossent = (-targets * np.log(predictions) -
                (1 - targets) * np.log(1 - predictions))
    # compare
    assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
Ejemplo n.º 7
0
def build_loss(pred_var, target_var, resp_dur, t_ind):
    if t_ind == 0 or t_ind == 1 or t_ind == 4:
        loss = T.mean(
            T.mod(
                T.abs_(pred_var[:, -resp_dur:, :] -
                       target_var[:, -resp_dur:, :]), np.pi))
    elif t_ind == 2 or t_ind == 6 or t_ind == 8:
        loss = T.mean(
            binary_crossentropy(pred_var[:, -resp_dur:, -1],
                                target_var[:, -resp_dur:, -1]))
    return loss
Ejemplo n.º 8
0
def binary_crossentropy_void(y_pred, y_true, y_mask):
    # Flatten y_true
    y_true = T.reshape(y_true, y_pred.shape)
    y_mask = T.reshape(y_mask, y_pred.shape)

    eps = 1e-12
    y_pred = y_pred.clip(0 + eps, 1 - eps)

    error = y_mask * binary_crossentropy(y_pred, y_true)

    return T.mean(error)
Ejemplo n.º 9
0
def test_binary_crossentropy(colvect):
    # symbolic version
    from lasagne.objectives import binary_crossentropy
    if not colvect:
        p, t = theano.tensor.matrices('pt')
        c = binary_crossentropy(p, t)
    else:
        # check that for convenience, comparing a prediction column vector
        # against a 1D target vector does not lead to broadcasting
        p, t = theano.tensor.vectors('pt')
        c = binary_crossentropy(p.dimshuffle(0, 'x'), t)[:, 0]
    # numeric version
    floatX = theano.config.floatX
    shape = (10, 20) if not colvect else (10,)
    predictions = np.random.rand(*shape).astype(floatX)
    targets = np.random.rand(*shape).astype(floatX)
    crossent = (- targets * np.log(predictions) -
                (1-targets) * np.log(1-predictions))
    # compare
    assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
Ejemplo n.º 10
0
def test_binary_crossentropy():
    # symbolic version
    from lasagne.objectives import binary_crossentropy
    p, t = theano.tensor.matrices('pt')
    c = binary_crossentropy(p, t)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, 20).astype(floatX)
    targets = np.random.rand(10, 20).astype(floatX)
    crossent = (- targets * np.log(predictions) -
                (1-targets) * np.log(1-predictions))
    # compare
    assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
Ejemplo n.º 11
0
def test_binary_crossentropy():
    # symbolic version
    from lasagne.objectives import binary_crossentropy
    p, t = theano.tensor.matrices('pt')
    c = binary_crossentropy(p, t)
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, 20).astype(floatX)
    targets = np.random.rand(10, 20).astype(floatX)
    crossent = (- targets * np.log(predictions) -
                (1-targets) * np.log(1-predictions))
    # compare
    assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
Ejemplo n.º 12
0
def run(get_model, model_name):
	train_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10)
	valid_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558)

	input_var  = tensor.tensor4('image_features')
	target_var = tensor.tensor4('image_targets')
	multiply_var = tensor.matrix('multiplier')
	multiply_var = T.addbroadcast(multiply_var, 1)

	test_prediction, prediction, params = get_model(input_var, target_var, multiply_var)

	loss = binary_crossentropy(prediction, target_var).mean()


	loss.name = 'loss'

	valid_error = T.neq((test_prediction>0.5)*1., target_var).mean()
	valid_error.name = 'error'

	scale = Scale(0.1)
	algorithm = GradientDescent(
		cost=loss,
		parameters=params,
		step_rule=scale,
		#step_rule=Adam(),
		on_unused_sources='ignore'
	)

	host_plot = 'http://localhost:5006'

	extensions = [
		Timing(),
		TrainingDataMonitoring([loss], after_epoch=True),
		DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"),
		Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'],['valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		# Checkpoint('train'),
		FinishAfter(after_n_epochs=10)
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	cg = ComputationGraph(test_prediction)
	while True:
		main_loop.run()
		scale.learning_rate.set_value(numpy.float32(scale.learning_rate.get_value()*0.7))
		numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
Ejemplo n.º 13
0
def similarity_iter(output_layer, match_layer, update_params, match_layer_w=0):
    X1 = T.tensor4()
    X2 = T.tensor4()
    y = T.ivector()

    # find the input layers
    # TODO this better
    all_layers = ll.get_all_layers(match_layer)
    # make image of all layers
    imwrite_architecture(all_layers, './layer_rep.png')

    input_1 = filter(lambda x: x.name == 'input1', all_layers)[0]
    input_2 = filter(lambda x: x.name == 'input2', all_layers)[0]

    descriptors_train, match_prob_train = ll.get_output([output_layer, match_layer], {input_1: X1, input_2: X2})
    descriptors_eval, match_prob_eval = ll.get_output([output_layer, match_layer], {input_1: X1, input_2: X2}, deterministic=True)
    #descriptor_shape = ll.get_output_shape(output_layer, {input_1: X1, input_2: X2})
    #print("Network output shape: %r" % (descriptor_shape,))
    # distance minimization
    distance = lambda x: (x[:,0,:] - x[:,1,:] + 1e-7).norm(2, axis=1)
    #distance_eval = (descriptors_eval[:,0,:] - descriptors_eval[:,1,:] + 1e-7).norm(2, axis=1)
    # 9/21 squaring the loss seems to prevent it from getting to 0.5 really quickly (i.e. w/in 3 epochs)
    # let's see if it will learn something good
    margin = 1
    decay = 0
    reg = regularize_network_params(match_layer, l2) * decay
    loss = lambda x, z: ((1-match_layer_w)*T.mean(y*(distance(x)) + (1 - y)*(T.maximum(0, margin - distance(x))))/2 # constrastive loss
            + match_layer_w*T.mean(binary_crossentropy(z.T + 1e-7,y))) # matching loss
    loss_reg = lambda x, z: (loss(x,z) + reg)
    # this loss doesn't work since it just pushes all the descriptors near each other and then predicts 0 all the time for tha matching
    #jason_loss = lambda x, z: T.mean(distance(x)*y + (1-y)*binary_crossentropy(z.T + 1e-7,y))
    #loss_eval = T.mean(y*(distance_eval**2) + (1 - y)*(T.maximum(0, 1 - distance_eval)**2))
    all_params = ll.get_all_params(match_layer) # unsure how I would do this if there were truly two trainable branches...
    loss_train = loss_reg(descriptors_train, match_prob_train)
    loss_train.name = 'combined_loss' # for the names
    grads = T.grad(loss_train, all_params, add_names=True)
    #updates = adam(grads, all_params, **update_params)
    updates = nesterov_momentum(grads, all_params, **update_params)

    train_iter = theano.function([X1, X2, y], [loss_train, loss(descriptors_train, match_prob_train)] + grads, updates=updates)
    #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True)
    valid_iter = theano.function([X1, X2, y], loss(descriptors_eval, match_prob_eval))

    return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
Ejemplo n.º 14
0
    def get_cost_L(self, inputs):
        # make it clear which get_output_for is used
        print('getting_cost_L')

        # inputs must obey the order.
        image_input, label_input = inputs
        encoder = self.encoder.get_output_for(self.concat_xy.get_output_for([image_input, label_input]))
        mu_z = self.encoder_mu.get_output_for(encoder)
        log_var_z = self.encoder_log_var.get_output_for(encoder)
        z = self.sampler.get_output_for([mu_z, log_var_z])

        decoder = self.decoder.get_output_for(self.concat_yz.get_output_for([label_input, z]))
        reconstruct = self.decoder_x.get_output_for(decoder)

        l_x = objectives.binary_crossentropy(reconstruct, image_input).sum(1)
        l_z = ((mu_z ** 2 + T.exp(log_var_z) - 1 - log_var_z) * 0.5).sum(1)

        cost_L = l_x + l_z
        return cost_L
Ejemplo n.º 15
0
    def get_cost_L(self, inputs):
        # make it clear which get_output_for is used
        print('getting_cost_L')

        # inputs must obey the order.
        image_input, label_input = inputs
        encoder = self.encoder.get_output_for(
            self.concat_xy.get_output_for([image_input, label_input]))
        mu_z = self.encoder_mu.get_output_for(encoder)
        log_var_z = self.encoder_log_var.get_output_for(encoder)
        z = self.sampler.get_output_for([mu_z, log_var_z])

        decoder = self.decoder.get_output_for(
            self.concat_yz.get_output_for([label_input, z]))
        reconstruct = self.decoder_x.get_output_for(decoder)

        l_x = objectives.binary_crossentropy(reconstruct, image_input).sum(1)
        l_z = ((mu_z**2 + T.exp(log_var_z) - 1 - log_var_z) * 0.5).sum(1)

        cost_L = l_x + l_z
        return cost_L
Ejemplo n.º 16
0
    def build_functions(self, deterministic=False):
        l_out = self.layer_output
        x_sym = T.lmatrix()
        y_sym = T.lvector()

        output = lasagne.layers.get_output(l_out,
                                           x_sym,
                                           deterministic=deterministic)
        pred = output.argmax(-1)

        #loss = objectives.categorical_crossentropy( output, y_sym ).mean()
        loss = objectives.binary_crossentropy(output, y_sym).mean()
        params = lasagne.layers.get_all_params(l_out)
        acc = T.mean(T.eq(output, y_sym))

        #grad = T.grad( loss, params )
        #updates = lasagne.updates.sgd( grad, params, learning_rate=0.01 )
        #updates = lasagne.updates.adam()
        updates = lasagne.updates.adam(loss, params)

        f_train = theano.function([x_sym, y_sym], [loss, acc], updates=updates)
        f_train_pred = theano.function([x_sym, y_sym], [loss, acc, output],
                                       updates=updates)
        f_val = theano.function([x_sym, y_sym], [loss, acc])

        f_predict = theano.function([x_sym], pred)
        f_test_predict = theano.function([x_sym], output)

        self.functions = Chibi_atlas({
            'train': f_train,
            'train_predict': f_train_pred,
            'val': f_val,
            'predict': f_predict,
            'test_predict': f_test_predict,
        })
        return self.functions
Ejemplo n.º 17
0
    def __init__(
            self,
            input_size,
            layers_config=[(64, 8, 2, 'valid'), (128, 3, 2, 'same')],
            code_layer_size=2,
            batch_norm=True,
            nonlinearity=rectify
    ):
        """"This class is made to support a variable number of layers.

        :type input_size: tuple of int
        :param input_size: Shape of the input i.e (None, 1, 28, 28) Means that it will have a defined at runtime
                           amount of examples with one channel and of size 28 x 28.

        :type layers_config: list of tuples of ints
        :param layers_config: Configuration of the net. i.e. [(64, 5, 2, 'valid'), (32, 3, None, 'same')]
                              Means the first layers will output 64 feature maps, use filters of size
                              of 5 and be followed by a max-pooling layer of with a pool-size of 2.
                              The second layer will output 32 feature maps, use filters of size 3 and
                              will not be followed by a pooling layer. The 4th param is the padding. see:
                              http://lasagne.readthedocs.org/en/latest/modules/layers/conv.html#lasagne.layers.Conv2DLayer

        :type code_layer_size: int
        :param code_layer_size: Determine the size of the code layer.

        :type batch_norm: bool
        :param batch_norm: If True, batch-normalization will be used. Otherwise, bias will be used.

        :type nonlinearity: Lasagne.nonlinearities
        :param nonlinearity: Define the activation function to use
        """

        def bias_plus_nonlinearity(l, bias, nl):
            l = bias(l)
            l = NonlinearityLayer(l, nonlinearity=nl)
            return l

        self.x = T.tensor4('inputs')  # the data is presented as rasterized images

        self.normalization_layer = BatchNormLayer if batch_norm else BiasLayer
        self.nonlinearity = nonlinearity
        self.code_layer_size = code_layer_size
        self.network_config_string = ""

        l = InputLayer(input_var=self.x, shape=input_size)
        invertible_layers = []  # Used to keep track of layers that will be inverted in the decoding phase
        """" Encoding """
        for layer in layers_config:
            l = Conv2DLayer(l, num_filters=layer[0], filter_size=layer[1], nonlinearity=None, b=None,
                            W=lasagne.init.GlorotUniform(), pad=layer[3])
            
            invertible_layers.append(l)
            self.network_config_string += "(" + str(layer[0]) + ")" + str(layer[1]) + "c"
            print(l.output_shape)
            bias_plus_nonlinearity(l, self.normalization_layer, self.nonlinearity)
            if layer[2] is not None:  # then we add a pooling layer
                l = MaxPool2DLayer(l, layer[2])
                invertible_layers.append(l)
                self.network_config_string += "-" + str(layer[2]) + "p"
                print(l.output_shape)
            self.network_config_string += "-"

        # l = DenseLayer(l, num_units=l.output_shape[1], nonlinearity=None, b=None)
        # invertible_layers.append(l)
        # self.network_config_string += str(l.output_shape[1]) + "fc"
        # print(l.output_shape)
        l = DenseLayer(l, num_units=self.code_layer_size, nonlinearity=None, b=None)
        invertible_layers.append(l)
        self.network_config_string += str(self.code_layer_size) + "fc"
        print(l.output_shape)
        # Inspired by Hinton (2006) paper, the code layer is linear which allows to retain more info especially with
        # with code layers of small dimension
        l = bias_plus_nonlinearity(l, self.normalization_layer, linear)
        self.code_layer = get_output(l)

        """ Decoding """
        # l = InverseLayer(l, invertible_layers.pop())  # Inverses the fully connected layer
        # print(l.output_shape)
        # l = bias_plus_nonlinearity(l, self.normalization_layer, self.nonlinearity)
        l = InverseLayer(l, invertible_layers.pop())  # Inverses the fully connected layer
        print(l.output_shape)
        l = bias_plus_nonlinearity(l, self.normalization_layer, self.nonlinearity)
        for i, layer in enumerate(layers_config[::-1]):
            if layer[2] is not None:
                l = InverseLayer(l, invertible_layers.pop())  # Inverse a max-pooling layer
                print(l.output_shape)
            l = InverseLayer(l, invertible_layers.pop())  # Inverse the convolutional layer
            print(l.output_shape)
            # last layer is a sigmoid because its a reconstruction and pixels values are between 0 and 1
            nl = sigmoid if i is len(layers_config) - 1 else self.nonlinearity
            l = bias_plus_nonlinearity(l, self.normalization_layer, nl)  # its own bias_nonlinearity

        self.network = l
        self.reconstruction = get_output(self.network)
        self.params = get_all_params(self.network, trainable=True)
        # Sum on axis 1-2-3 as they represent the image (channels, height, width). This means that we obtain the binary
        # _cross_entropy for every images of the mini-batch which we then take the mean.
        self.fine_tune_cost = T.sum(binary_crossentropy(self.reconstruction, self.x), axis=(1, 2, 3)).mean()
        self.test_cost = T.sum(binary_crossentropy(get_output(self.network), self.x), axis=(1,2,3)).mean()
Ejemplo n.º 18
0
def loss(x, t):
    return aggregate(binary_crossentropy(x, t))
Ejemplo n.º 19
0
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
Ejemplo n.º 20
0
def distance_capped_binary_crossentropy(preds, targets, distance):
    loss = binary_crossentropy(preds, targets)
    mask = T.gt(T.abs_(preds - targets), distance)
    return loss * mask
Ejemplo n.º 21
0
                              W=GlorotUniform(),
                              nonlinearity=rectify)
print(get_output_shape(lae_deconv7))
lae_unpool7 = Upscale2DLayer(lae_deconv7, (2, 2))
print(get_output_shape(lae_unpool7))

convae = Conv2DLayerFast(lae_unpool7,
                         1, (5, 5),
                         pad=(2, 2),
                         W=GlorotUniform(),
                         nonlinearity=sigmoid)
print(get_output_shape(convae))

print('[ConvAE] define loss, optimizer, and compile')
Yae_pred_ = get_output(convae)
loss_ = binary_crossentropy(Yae_pred_, Yae_)
loss_ = loss_.mean()

params_ = lasagne.layers.get_all_params(convae, trainable=True)
updates_ = rmsprop(loss_, params_, learning_rate=confae['lr'])
train_ae_fn = theano.function([Xae_, Yae_], loss_, updates=updates_)
pred_ae_fn = theano.function([Xae_], Yae_pred_)

##################

if confnet['is_aug']:
    ddatagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
def get_options(batchsize, nepochs, plotevery, learningrate, normalizegrads,
                clipgrads, enabledebug, optimizer, yzeromean, yunitvar,
                noshuffle, nobatchnorm, remove5koutliers, coulombdim, datadir,
                outputdir):

    global batch_size
    batch_size = batchsize
    global epochs
    epochs = nepochs

    print("Changing pwd to {}".format(outputdir))
    os.chdir(outputdir)

    mydir = os.path.join(os.getcwd(),
                         datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    os.makedirs(mydir)
    os.chdir(mydir)

    app_name = sys.argv[0]
    global logger
    logger = get_logger(app_name=app_name, logfolder=mydir)

    # Load dataset
    w, h = coulombdim
    X, Y = load_data(datadir + os.sep + "coulomb.txt",
                     datadir + os.sep + "energies.txt",
                     w=w,
                     h=h)

    if remove5koutliers:
        from get_idxs_to_keep import get_idxs_to_keep
        idxs = get_idxs_to_keep(datadir + os.sep + "energies.txt")
        X = X[idxs, :]
        Y = Y[idxs, :]
        logger.info("REMOVING 5k outliers.")

    Y, Y_mean, Y_std, Y_binarized = preprocess_targets(Y,
                                                       zero_mean=yzeromean,
                                                       unit_var=yunitvar)
    [X_train, X_test], [Y_train,
                        Y_test], splits = get_data_splits(X,
                                                          Y,
                                                          splits=[90, 10])
    [Y_binarized_train, Y_binarized_test] = np.split(Y_binarized, splits)[:-1]

    np.savez('Y_vals.npz',
             Y_train=Y_train,
             Y_test=Y_test,
             Y_binarized_test=Y_binarized_test,
             Y_binarized_train=Y_binarized_train,
             Y_mean=Y_mean,
             Y_std=Y_std)
    np.savez('X_vals.npz', X_train=X_train, X_test=X_test)

    dataDim = X.shape[1:]
    assert dataDim == (
        w, h
    ), "The dimensions of data you have passed {} and the ones after loading datafile {} don't match !".format(
        (w, h), dataDim)
    outputDim = Y.shape[1]
    datapoints = len(X_train)
    print("datapoints = %d" % datapoints)

    # # making the datapoints shared variables
    # X_train           = make_shared(X_train)
    # X_test            = make_shared(X_test)
    # Y_train           = make_shared(Y_train)
    # Y_test            = make_shared(Y_test)
    # Y_binarized_train = make_shared(Y_binarized_train)
    # Y_binarized_test  = make_shared(Y_binarized_test)

    # TODO !!!!I am here
    # print("Train set size {}, Train set (labelled) size {}, Test set size {}," +
    #         "Validation set size {}".format(
    #             train_set[0].size,train_set_labeled[0].size,
    #             test_set[0].size, valid_set[0].size))

    eigen_value_count = outputDim

    # Defining the model now.
    th_coulomb = T.ftensor3()
    th_energies = T.fmatrix()
    th_energies_bin = T.fmatrix()
    th_learningrate = T.fscalar()

    l_input = InputLayer(shape=(None, dataDim[0], dataDim[1]),
                         input_var=th_coulomb,
                         name="Input")
    l_input = FlattenLayer(l_input, name="FlattenInput")
    l_pseudo_bin = DenseLayer(l_input,
                              num_units=2000,
                              nonlinearity=sigmoid,
                              name="PseudoBinarized")
    if not nobatchnorm:
        l_pseudo_bin = batch_norm(l_pseudo_bin)

    l_h1 = []
    l_h2 = []
    l_realOut = []
    l_binOut = []

    for branch_num in range(eigen_value_count):
        l_h1.append(
            DenseLayer(l_pseudo_bin,
                       num_units=1000,
                       nonlinearity=rectify,
                       name="hidden_1_%d" % branch_num))
        l_h2.append(
            DenseLayer(l_h1[-1],
                       num_units=400,
                       nonlinearity=rectify,
                       name="hidden_2_%d" % branch_num))
        l_realOut.append(
            DenseLayer(l_h2[-1],
                       num_units=1,
                       nonlinearity=linear,
                       name="realOut_%d" % branch_num))
        l_binOut.append(
            DenseLayer(l_h2[-1],
                       num_units=1,
                       nonlinearity=sigmoid,
                       name="binOut"))

    l_realOut_cat = ConcatLayer(l_realOut, name="real_concat")
    l_binOut_cat = ConcatLayer(l_binOut, name="bin_concat")
    l_output = ElemwiseMergeLayer([l_binOut_cat, l_realOut_cat],
                                  T.mul,
                                  name="final_output")

    energy_output = get_output(l_output, deterministic=False)
    binary_output = get_output(l_binOut_cat, deterministic=False)
    # get deterministic output for validation
    energy_output_det = get_output(l_output, deterministic=True)
    binary_output_det = get_output(l_binOut_cat, deterministic=True)

    loss_real = T.mean(abs(energy_output - th_energies))
    loss_binary = T.mean(binary_crossentropy(binary_output, th_energies_bin))
    loss = loss_real + loss_binary

    # get loss output for validation
    loss_real_det = T.mean(abs(energy_output_det - th_energies))
    loss_binary_det = T.mean(
        binary_crossentropy(binary_output_det, th_energies_bin))
    loss_det = loss_real_det + loss_binary_det

    params = get_all_params(l_output, trainable=True)
    grad = T.grad(loss, params)

    if normalizegrads is not None:
        grad = lasagne.updates.total_norm_constraint(grad,
                                                     max_norm=normalizegrads)

    if clipgrads is not None:
        grad = [T.clip(g, -clipgrads, clipgrads) for g in grad]

    optimization_algo = get_optimizer[optimizer]
    # updates = optimization_algo(grad, params, learning_rate=learningrate)
    updates = optimization_algo(grad, params, learning_rate=th_learningrate)

    train_fn = theano.function(
        [th_coulomb, th_energies, th_energies_bin, th_learningrate],
        [loss, energy_output],
        updates=updates,
        allow_input_downcast=True)
    get_grad = theano.function([th_coulomb, th_energies, th_energies_bin],
                               grad)
    # get_updates = theano.function([th_data, th_labl], [updates.values()])
    # val_fn    = theano.function([th_coulomb, th_energies, th_energies_bin], [loss, energy_output], updates=updates, allow_input_downcast=True)
    val_fn = theano.function([th_coulomb, th_energies, th_energies_bin],
                             [loss_det, energy_output_det],
                             allow_input_downcast=True)

    datapoints = len(X_train)
    print("datapoints = %d" % datapoints)

    with open(os.path.join(mydir, "data.txt"), "w") as f:
        script = app_name
        for elem in [
                "meta_seed", "dataDim", "batch_size", "epochs", "learningrate",
                "normalizegrads", "clipgrads", "enabledebug", "optimizer",
                "plotevery", "noshuffle", "nobatchnorm", "remove5koutliers",
                "coulombdim", "script", "datadir"
        ]:
            f.write("{} : {}\n".format(elem, eval(elem)))

    train_loss_lowest = np.inf
    test_loss_lowest = np.inf

    row_norms = np.linalg.norm(X_train, axis=-1)
    for epoch in range(epochs):
        batch_start = 0
        train_loss = []

        if learningrate == None:
            if epoch < 50:
                learning_rate = 0.0001
            elif epoch < 100:
                learning_rate = 0.00001
            elif epoch < 500:
                learning_rate = 0.000001
            else:
                learning_rate = 0.0000001
        else:
            learning_rate = eval(learningrate)
            if isinstance(learning_rate, float):
                pass
            elif isinstance(learning_rate, list):
                for epch, lrate in learning_rate:
                    # ensure that last epoch is float("inf")
                    if epoch <= epch:
                        learning_rate = lrate
                        break
            else:
                raise RuntimeError(
                    "Invalid learning rate.Either \n 1) Float or 2) List [[epch, lrate],...,[float('inf'), lrate]]"
                )
        logger.debug("learning rate {}".format(learning_rate))

        indices = np.random.permutation(datapoints)
        minibatches = int(datapoints / batch_size)
        if not noshuffle:
            logger.debug("Shuffling Started.")
            X_train = coulomb_shuffle(X_train, row_norms)
            logger.debug("Shuffling complete.")

        for minibatch in range(minibatches):
            train_idxs = indices[batch_start:batch_start + batch_size]
            X_train_batch = X_train[train_idxs, :]
            Yr_train_batch = Y_train[train_idxs, :]
            Yb_train_batch = Y_binarized_train[train_idxs, :]

            train_output = train_fn(X_train_batch, Yr_train_batch,
                                    Yb_train_batch, learning_rate)
            batch_start = batch_start + batch_size

            train_loss.append(train_output[0])

            if enabledebug:
                # Debugging information
                batchIdx = epoch * minibatches + minibatch
                fn = 'params_{:>010d}'.format()  # saving params
                param_values = get_all_param_values(l_output)
                param_norm = np.linalg.norm(
                    np.hstack([param.flatten() for param in param_values]))
                gradients = get_grad(X_train_batch, Yr_train_batch,
                                     Yb_train_batch)
                gradient_norm = np.linalg.norm(
                    np.hstack([gradient.flatten() for gradient in gradients]))
                logger.debug(
                    "Epoch : {:0>4}  minibatch {:0>3} Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} (Values from Prev. Minibatch) Train loss {}"
                    .format(epoch, minibatch, gradient_norm, param_norm,
                            gradient_norm / param_norm, train_loss[-1]))
                param_names = [
                    param.__str__() for param in get_all_params(l_output)
                ]
                np.savez(fn + '.npz', **dict(zip(param_names, param_values)))
                np.savez('Y_train_pred_{}.npz'.format(batchIdx),
                         Y_train_pred=train_output[1])
                if train_loss[-1] < train_loss_lowest:
                    train_loss_lowest = train_loss[-1]
                    np.savez('Y_train_pred_best.npz',
                             Y_train_pred=train_output[1])
                    logger.debug(
                        "Found the best training prediction (Y_train_pred_best) at %d epoch %d minibatch"
                        % (epoch, minibatch))
                if np.isnan(gradient_norm):
                    pdb.set_trace()

        if (epoch % plotevery == 0):
            logger.info("Epoch {} of {}".format(epoch, epochs))

            fn = 'params_{:>03d}'.format(epoch)  # saving params
            param_values = get_all_param_values(l_output)
            param_norm = np.linalg.norm(
                np.hstack([param.flatten() for param in param_values]))
            param_names = [
                param.__str__() for param in get_all_params(l_output)
            ]
            if not enabledebug:
                np.savez(fn + '.npz', **dict(zip(param_names, param_values)))
                np.savez('Y_train_pred_{}.npz'.format(epoch),
                         Y_train_pred=train_output[1])
                mean_train_loss = np.mean(train_loss)
                if mean_train_loss < train_loss_lowest:
                    train_loss_lowest = mean_train_loss
                    np.savez('Y_train_pred_best.npz',
                             Y_train_pred=train_output[1])
                    logger.info(
                        "Found the best training prediction (Y_train_pred_best) at %d epoch"
                        % epoch)

            gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch)
            gradient_norm = np.linalg.norm(
                np.hstack([gradient.flatten() for gradient in gradients]))
            logger.info(
                "  Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} "
                .format(gradient_norm, param_norm, gradient_norm / param_norm))
            logger.info("  Train loss {:>0.4}".format(np.mean(train_loss)))

            test_loss, test_prediction = val_fn(X_test, Y_test,
                                                Y_binarized_test)
            np.savez('Y_test_pred_{}.npz'.format(epoch),
                     Y_test_pred=test_prediction)
            logger.info("  Test loss {}".format(test_loss))
            if test_loss < test_loss_lowest:
                test_loss_lowest = test_loss
                np.savez('Y_test_pred_best.npz', Y_test_pred=test_prediction)
                logger.info(
                    "Found the best test prediction (Y_test_pred_best) at %d epoch"
                    % epoch)
Ejemplo n.º 23
0
def load_weights():
    model_name = 'model_weights.npz'
    with np.load(model_name) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    set_all_param_values([RoadSegment], param_values)


def save_weights():
    np.savez('model_weights.npz', *get_all_param_values([RoadSegment]))


road_segment = theano.function(inputs=[X],
                               outputs=frs,
                               allow_input_downcast=True)

loss = binary_crossentropy(rs, Y)
loss = loss.mean()

IoU = T.and_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2)) * \
      T.minimum(T.inv(T.or_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2))), 128.0 * 256.0)
IoU = IoU.mean()

test_loss = binary_crossentropy(frs, Y)
test_loss = test_loss.mean()

test_IoU = T.and_(T.ge(frs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2)) * \
           T.minimum(T.inv(T.or_(T.ge(frs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2))), 128.0 * 256.0)
test_IoU = test_IoU.mean()

loss_function = theano.function(inputs=[X, Y, P],
                                outputs=[loss, IoU],
Ejemplo n.º 24
0
def saliency_map(input, output, pred, X):
    score = -binary_crossentropy(output[:, pred], np.array([1])).sum()
    return np.abs(T.grad(score, input).eval({input: X}))
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen):

    print("Building model with LSTM")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    GRAD_CLIP = wordDim

    args.lstmDim = 150

    input = InputLayer((None, seqlen),input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    input_mask = InputLayer((None, seqlen),input_var=input_mask_var)
    
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb_1.W].remove('trainable')

    lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh)

    lstm_back = LSTMLayer(
        emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh, backwards=True)

    slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim)
    slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim)

    concat = ConcatLayer([slice_forward, slice_backward])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))


    train_fn = theano.function([input_var, input_mask_var,target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))

    val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
Ejemplo n.º 27
0
def train(options):
    # -------- setup options and data ------------------
    np.random.seed(options['seed'])

    # Load options
    host = socket.gethostname() # get computer hostname
    start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M")

    model = importlib.import_module(options['model_file'])

    # ---------- build model and compile ---------------
    input_batch = T.tensor4() # input image sequences
    target = T.tensor4() # target image

    print('Build model...')
    model = model.Model(**options['modelOptions'])

    print('Compile ...')
    net, outputs, filters = model.build_model(input_batch)

    # compute loss
    outputs = get_output(outputs + [filters])
    output_frames = outputs[:-1]
    output_filter = outputs[-1]

    train_losses = []
    for i in range(options['modelOptions']['target_seqlen']):
        output_frame = output_frames[i]

        if options['loss'] == 'squared_error':
            frame_loss = squared_error(output_frame, target[:, [i], :, :])
        elif options['loss'] == 'binary_crossentropy':
            # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436
            output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps)
            frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:])
        else:
            assert False

        train_losses.append(aggregate(frame_loss))

    train_loss = sum(train_losses) / options['modelOptions']['target_seqlen']

    # update
    sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate

    layers = get_all_layers(net)
    all_params = get_all_params(layers, trainable = True)
    updates = adam(train_loss, all_params, learning_rate=sh_lr)
    _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True)
    _test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True)

    # ------------ data setup ----------------
    print('Prepare data...')
    dataset = importlib.import_module(options['dataset_file'])
    dh = dataset.DataHandler(**options['datasetOptions'])

    # ------------ training setup ----------------
    if options['pretrained_model_path'] is not None:
        checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb'))
        model_values = checkpoint['model_values'] # overwrite the values of model parameters
        lasagne.layers.set_all_param_values(layers, model_values)

        history_train = checkpoint['history_train']
        start_epoch = checkpoint['epoch'] + 1
        options['batch_size'] = checkpoint['options']['batch_size']
        sh_lr.set_value(floatX(checkpoint['options']['learning_rate']))
    else:
        start_epoch = 0
        history_train = []

    # ------------ actual training ----------------
    print 'Start training ...'

    input_seqlen = options['modelOptions']['input_seqlen']
    for epoch in range(start_epoch, start_epoch + options['num_epochs']):
        epoch_start_time = time.time()

        history_batch = []
        for batch_index in range(0, options['batches_per_epoch']):

            batch = dh.GetBatch() # generate data on the fly
            if options['dataset_file'] == 'datasets.stereoCarsColor':
                batch_input = batch[..., :input_seqlen].squeeze(axis=4)  # first frames
                batch_target = batch[..., input_seqlen:].squeeze(axis=4)  # last frame
            else:
                batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames
                batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame

            # train
            loss_train = _train(batch_input, batch_target)
            history_batch.append(loss_train)

            print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time))
            print("  training loss:\t{:.6f}".format(loss_train.item()))

        # clear the screen
        display.clear_output(wait=True)

        # print statistics
        history_train.append(np.mean(history_batch))
        history_batch = []
        print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time))
        print("  training loss:\t{:.6f}".format(history_train[epoch].item()))

        # set new learning rate (maybe this is unnecessary with adam updates)
        if (epoch+1) % options['decay_after'] == 0:
            options['learning_rate'] = sh_lr.get_value() * 0.5
            print "New LR:", options['learning_rate']
            sh_lr.set_value(floatX(options['learning_rate']))

        # save the model
        if (epoch+1) % options['save_after'] == 0:
            save_model(layers, epoch, history_train, start_time, host, options)
            print("Model saved")
Ejemplo n.º 28
0
 def saliency_map(input, output, pred, Xb):
     import theano.tensor as T
     from lasagne.objectives import binary_crossentropy
     score = -binary_crossentropy(output[:, pred], np.array([1])).sum()
     heat_map_ = np.abs(T.grad(score, input).eval({input: Xb}))
     return heat_map_
Ejemplo n.º 29
0
    allow_incomplete=True,
    include_all=True,
    skip_probability=0.25,
    offset_probability=0,
    n_rectangular_segments=N_SEGMENTS,
    rectangular_kwargs={'format': 'changepoints [0,1]'}
)


net_dict = dict(
    save_plot_interval=SAVE_PLOT_INTERVAL,
#    loss_function=partial(ignore_inactive, loss_func=mdn_nll, seq_length=SEQ_LENGTH),
#    loss_function=lambda x, t: mdn_nll(x, t).mean(),
#    loss_function=lambda x, t: (mse(x, t) * MASK).mean(),
#    loss_function=lambda x, t: mse(x, t).mean(),
    loss_function=lambda x, t: binary_crossentropy(x, t).mean(),
#    loss_function=partial(scaled_cost, loss_func=mse),
#    loss_function=ignore_inactive,
#    loss_function=partial(scaled_cost3, ignore_inactive=False),
#    updates_func=momentum,
    updates_func=clipped_nesterov_momentum,
    updates_kwargs={'clip_range': (0, 10)},
    learning_rate=1e-2,
    learning_rate_changes_by_iteration={
        1000: 1e-3,
        5000: 1e-4
    },
    do_save_activations=True,
    auto_reshape=False,
#    plotter=CentralOutputPlotter
#    plotter=Plotter(n_seq_to_plot=32)
Ejemplo n.º 30
0
def load_weights():
    model_name = 'model/' + dataset[5:] + '_model.npz'
    with np.load(model_name) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    set_all_param_values([FloorSegment, EdgeRegion], param_values)

def save_weights():
    np.savez('model_weights.npz', *get_all_param_values([RoadSegment]))

road_segment  = theano.function(inputs               = [X],
                                outputs              = frs,
                                allow_input_downcast = True)


loss = binary_crossentropy(rs,Y)
loss = loss.mean()

IoU = T.and_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2)) * \
      T.minimum(T.inv(T.or_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2))), 120.0 * 160.0)
IoU = IoU.mean()

loss_function = theano.function(inputs               = [X,Y,P],
                                outputs              = [loss,IoU],
                                allow_input_downcast = True)

params = get_all_params(RoadSegment, trainable=True)

updates = adam(loss,
               params,
               learning_rate = lr)
Ejemplo n.º 31
0
def saliency_map(input, output, pred, X):
    score = -binary_crossentropy(output[:, pred], np.array([1])).sum()
    return np.abs(T.grad(score, input).eval({input: X}))
Ejemplo n.º 32
0
def compile_update_softmax(nnet, inputs, targets):
    """
    create a softmax loss for network given in argument
    """

    floatX = Cfg.floatX
    C = Cfg.C

    final_layer = nnet.all_layers[-1]
    trainable_params = lasagne.layers.get_all_params(final_layer,
                                                     trainable=True)

    # Regularization
    if Cfg.weight_decay:
        l2_penalty = (floatX(0.5) / C) * get_l2_penalty(nnet)
    else:
        l2_penalty = T.cast(0, dtype='float32')

    # Backpropagation
    prediction = lasagne.layers.get_output(final_layer,
                                           inputs=inputs,
                                           deterministic=False)

    if Cfg.ad_experiment:
        train_loss = T.mean(l_objectives.binary_crossentropy(
            prediction.flatten(), targets),
                            dtype='float32')
        train_acc = T.mean(l_objectives.binary_accuracy(
            prediction.flatten(), targets),
                           dtype='float32')
    else:
        train_loss = T.mean(l_objectives.categorical_crossentropy(
            prediction, targets),
                            dtype='float32')
        train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets),
                           dtype='float32')

    train_obj = T.cast(train_loss + l2_penalty, dtype='float32')
    updates = get_updates(nnet,
                          train_obj,
                          trainable_params,
                          solver=nnet.solver)
    nnet.backprop = theano.function([inputs, targets], [train_obj, train_acc],
                                    updates=updates)

    # Forwardpropagation
    test_prediction = lasagne.layers.get_output(final_layer,
                                                inputs=inputs,
                                                deterministic=True)

    if Cfg.ad_experiment:
        test_loss = T.mean(l_objectives.binary_crossentropy(
            test_prediction.flatten(), targets),
                           dtype='float32')
        test_acc = T.mean(l_objectives.binary_accuracy(
            test_prediction.flatten(), targets),
                          dtype='float32')
    else:
        test_loss = T.mean(l_objectives.categorical_crossentropy(
            test_prediction, targets),
                           dtype='float32')
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), targets),
                          dtype='float32')

    test_obj = T.cast(test_loss + l2_penalty, dtype='float32')
    nnet.forward = theano.function(
        [inputs, targets],
        [test_obj, test_acc, test_prediction, l2_penalty, test_loss])
Ejemplo n.º 33
0
def distance_capped_binary_crossentropy(preds, targets, distance):
    loss = binary_crossentropy(preds, targets)
    mask = T.gt(T.abs_(preds - targets), distance)
    return loss * mask
Ejemplo n.º 34
0
    # target_stats={
    #     'mean': np.array([ 0.04066789,  0.01881946,
    #                        0.24639061,  0.17608672,  0.10273963],
    #                      dtype=np.float32),
    #     'std': np.array([ 0.11449792,  0.07338708,
    #                    0.26608968,  0.33463112,  0.21250485],
    #                  dtype=np.float32)}
)

N = 50
net_dict = dict(
    save_plot_interval=SAVE_PLOT_INTERVAL,
    #    loss_function=partial(ignore_inactive, loss_func=mdn_nll, seq_length=SEQ_LENGTH),
    #    loss_function=lambda x, t: mdn_nll(x, t).mean(),
    #    loss_function=lambda x, t: mse(x, t).mean(),
    loss_function=lambda x, t: binary_crossentropy(x, t).mean(),
    #    loss_function=partial(scaled_cost, loss_func=mse),
    #    loss_function=ignore_inactive,
    #    loss_function=partial(scaled_cost3, ignore_inactive=False),
    updates_func=momentum,
    learning_rate=1e-4,
    learning_rate_changes_by_iteration={
        # 200: 1e-2,
        # 400: 1e-3,
        # 800: 1e-4
        #        500: 1e-3
        #  4000: 1e-03,
        # 6000: 5e-06,
        # 7000: 1e-06
        # 2000: 5e-06
        # 3000: 1e-05
def build_network_2dconv(args, input_var, target_var, wordEmbeddings, maxlen=60):

    print("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100
    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    input = InputLayer((None, maxlen), input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb.params[emb.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape = ReshapeLayer(emb, (batchsize, 1, maxlen, wordDim))

    conv2d = Conv2DLayer(
        reshape,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size)  # (None, 100, 1, 1)

    forward = FlattenLayer(maxpool)  # (None, 100) #(None, 50400)

    hid = DenseLayer(forward, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)

    loss = T.mean(binary_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction, target_var))

    train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
Ejemplo n.º 36
0
def safe_binary_crossentropy(predictions, targets, eps=1e-4):
    # add eps for predictions that are smaller than eps
    predictions = predictions + T.le(predictions, eps) * eps
    # remove eps for predictions that are larger than 1 - eps
    predictions = predictions - T.ge(predictions, 1 - eps) * eps
    return binary_crossentropy(predictions, targets)
Ejemplo n.º 37
0
def run(get_model, model_name):
    train_stream = ServerDataStream(
        ('cases', 'image_features', 'image_targets', 'multiplier'),
        False,
        hwm=10)
    valid_stream = ServerDataStream(
        ('cases', 'image_features', 'image_targets', 'multiplier'),
        False,
        hwm=10,
        port=5558)

    input_var = tensor.tensor4('image_features')
    target_var = tensor.tensor4('image_targets')
    multiply_var = tensor.matrix('multiplier')
    multiply_var = T.addbroadcast(multiply_var, 1)

    test_prediction, prediction, params = get_model(input_var, target_var,
                                                    multiply_var)

    loss = binary_crossentropy(prediction, target_var).mean()

    loss.name = 'loss'

    valid_error = T.neq((test_prediction > 0.5) * 1., target_var).mean()
    valid_error.name = 'error'

    scale = Scale(0.1)
    algorithm = GradientDescent(
        cost=loss,
        parameters=params,
        step_rule=scale,
        #step_rule=Adam(),
        on_unused_sources='ignore')

    host_plot = 'http://localhost:5006'

    extensions = [
        Timing(),
        TrainingDataMonitoring([loss], after_epoch=True),
        DataStreamMonitoring(variables=[loss, valid_error],
                             data_stream=valid_stream,
                             prefix="valid"),
        Plot('%s %s %s' %
             (model_name, datetime.date.today(), time.strftime('%H:%M')),
             channels=[['loss', 'valid_loss'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        # Checkpoint('train'),
        FinishAfter(after_n_epochs=10)
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    cg = ComputationGraph(test_prediction)
    while True:
        main_loop.run()
        scale.learning_rate.set_value(
            numpy.float32(scale.learning_rate.get_value() * 0.7))
        numpy.savez('best_weights.npz',
                    [param.get_value() for param in cg.shared_variables])
def multi_task_classifier(args,
                          input_var,
                          target_var,
                          wordEmbeddings,
                          seqlen,
                          num_feats,
                          lambda_val=0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen - kw + 1
    stride = 1
    filter_size = wordDim
    pool_size = num_filters

    input = InputLayer((None, seqlen, num_feats), input_var=input_var)
    batchsize, _, _ = input.input_var.shape

    #span
    emb1 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim))
    conv1d_1 = DimshuffleLayer(
        Conv1DLayer(reshape1,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)
    hid_1 = DenseLayer(maxpool_1,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)
    """
    #DocTimeRel
    emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim))
    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax)
    """

    #Type
    emb3 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim))
    conv1d_3 = DimshuffleLayer(
        Conv1DLayer(reshape3,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)
    hid_3 = DenseLayer(maxpool_3,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax)

    #Degree
    emb4 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim))
    conv1d_4 = DimshuffleLayer(
        Conv1DLayer(reshape4,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)
    hid_4 = DenseLayer(maxpool_4,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax)

    #Polarity
    emb5 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim))
    conv1d_5 = DimshuffleLayer(
        Conv1DLayer(reshape5,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)
    hid_5 = DenseLayer(maxpool_5,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax)

    #ContextualModality
    emb6 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim))
    conv1d_6 = DimshuffleLayer(
        Conv1DLayer(reshape6,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)
    hid_6 = DenseLayer(maxpool_6,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax)
    """
    #ContextualAspect
    emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim))
    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax)
    """
    """
    #Permanence
    emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim))
    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax)
    """

    # Is this important?
    """
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])
    """
    network_1_out = get_output(network_1)
    network_3_out = get_output(network_3)
    network_4_out = get_output(network_4)
    network_5_out = get_output(network_5)
    network_6_out = get_output(network_6)

    loss_1 = T.mean(binary_crossentropy(
        network_1_out, target_var)) + regularize_layer_params_weighted(
            {
                emb1: lambda_val,
                conv1d_1: lambda_val,
                hid_1: lambda_val,
                network_1: lambda_val
            }, l2)
    updates_1 = adagrad(loss_1,
                        get_all_params(network_1, trainable=True),
                        learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var],
                                 loss_1,
                                 updates=updates_1,
                                 allow_input_downcast=True)
    val_acc_1 = T.mean(
        binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var],
                               val_acc_1,
                               allow_input_downcast=True)
    """
    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)
    """

    loss_3 = T.mean(categorical_crossentropy(
        network_3_out, target_var)) + regularize_layer_params_weighted(
            {
                emb3: lambda_val,
                conv1d_3: lambda_val,
                hid_3: lambda_val,
                network_3: lambda_val
            }, l2)
    updates_3 = adagrad(loss_3,
                        get_all_params(network_3, trainable=True),
                        learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var],
                                 loss_3,
                                 updates=updates_3,
                                 allow_input_downcast=True)
    val_acc_3 = T.mean(
        categorical_accuracy(get_output(network_3, deterministic=True),
                             target_var))
    val_fn_3 = theano.function([input_var, target_var],
                               val_acc_3,
                               allow_input_downcast=True)

    loss_4 = T.mean(categorical_crossentropy(
        network_4_out, target_var)) + regularize_layer_params_weighted(
            {
                emb4: lambda_val,
                conv1d_4: lambda_val,
                hid_4: lambda_val,
                network_4: lambda_val
            }, l2)
    updates_4 = adagrad(loss_4,
                        get_all_params(network_4, trainable=True),
                        learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var],
                                 loss_4,
                                 updates=updates_4,
                                 allow_input_downcast=True)
    val_acc_4 = T.mean(
        categorical_accuracy(get_output(network_4, deterministic=True),
                             target_var))
    val_fn_4 = theano.function([input_var, target_var],
                               val_acc_4,
                               allow_input_downcast=True)

    loss_5 = T.mean(categorical_crossentropy(
        network_5_out, target_var)) + regularize_layer_params_weighted(
            {
                emb5: lambda_val,
                conv1d_5: lambda_val,
                hid_5: lambda_val,
                network_5: lambda_val
            }, l2)
    updates_5 = adagrad(loss_5,
                        get_all_params(network_5, trainable=True),
                        learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var],
                                 loss_5,
                                 updates=updates_5,
                                 allow_input_downcast=True)
    val_acc_5 = T.mean(
        categorical_accuracy(get_output(network_5, deterministic=True),
                             target_var))
    val_fn_5 = theano.function([input_var, target_var],
                               val_acc_5,
                               allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(
        network_6_out, target_var)) + regularize_layer_params_weighted(
            {
                emb6: lambda_val,
                conv1d_6: lambda_val,
                hid_6: lambda_val,
                network_6: lambda_val
            }, l2)
    updates_6 = adagrad(loss_6,
                        get_all_params(network_6, trainable=True),
                        learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var],
                                 loss_6,
                                 updates=updates_6,
                                 allow_input_downcast=True)
    val_acc_6 = T.mean(
        categorical_accuracy(get_output(network_6, deterministic=True),
                             target_var))
    val_fn_6 = theano.function([input_var, target_var],
                               val_acc_6,
                               allow_input_downcast=True)
    """
    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)
    """
    """
    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
    """
    return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6
Ejemplo n.º 39
0
## Discriminator(D)
D_layers, layer_X, layer_Y = bin_mnist.makeDiscriminator(
    MINIBATCH_SIZE, X, (MINIBATCH_SIZE, nc, npx, npx), Y, NUM_Y)
# D output for Real Data
p_real = ll.get_output(D_layers, inputs={layer_X: X})
# D output for Generated Data
p_gen = ll.get_output(D_layers, inputs={layer_X: gX})

print 'getDisParams:'
discrim_params, discrim_sp_params = bin_mnist.getDisParams()

## Costs

# Cost function of D for real data = average of BCE(binary cross entropy)
d_cost_real = lo.binary_crossentropy(p_real, T.ones(p_real.shape)).mean()

# Cost function of D for gen  data = average of BCE
d_cost_gen = lo.binary_crossentropy(p_gen, T.zeros(p_gen.shape)).mean()

# Const function of G = average of BCE
g_cost_d = lo.binary_crossentropy(p_gen, T.ones(p_gen.shape)).mean()

# total cost of D
d_cost = d_cost_real + d_cost_gen

# total cost of G
g_cost = g_cost_d

# total costs
cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]
Ejemplo n.º 40
0
def safe_binary_crossentropy(predictions, targets, eps=1e-4):
    # add eps for predictions that are smaller than eps
    predictions = predictions + T.le(predictions, eps) * eps
    # remove eps for predictions that are larger than 1 - eps
    predictions = predictions - T.ge(predictions, 1 - eps) * eps
    return binary_crossentropy(predictions, targets)
Ejemplo n.º 41
0

#disc.append(ll.dropout(ll.DenseLayer(disc[-1],num_units=1000),p=0.3))
#disc.append(ll.dropout(ll.DenseLayer(disc[-1],num_units=500),p=0.2))
#disc.append(ll.dropout(ll.DenseLayer(disc[-1],num_units=250),p=0.3))
#disc.append(ll.GaussianNoiseLayer(disc[-1], sigma=0.01))
disc.append(ll.dropout(ll.DenseLayer(disc[-1], num_units=512,nonlinearity=nonlin.very_leaky_rectify),p=0.0))


disc.append(ll.DenseLayer(disc[-1],num_units=1,nonlinearity=nonlin.sigmoid))
disc_data=ll.get_output(disc[-1],inputs=Xvar)
disc_gen=ll.get_output(disc[-1],gen_out)
disc_params=ll.get_all_params(disc)

#data_obj=T.mean(T.log(disc_data)) #objective function for data
data_obj=lo.binary_crossentropy(disc_data,T.ones(batch_size)).mean()



data_train=theano.function(
    inputs=[Xvar],
    outputs=data_obj,
    updates=lu.adam(data_obj,disc_params,learning_rate=lr),
        allow_input_downcast=True
    )

#gen_obj = T.mean(T.log(T.ones(batch_size) - disc_gen )  )
gen_obj=lo.binary_crossentropy(disc_gen,T.ones(batch_size)).mean()
b=theano.function(inputs=[noisevar],outputs=disc_gen,
        allow_input_downcast=True)
 def loss(x, t):
     return aggregate(binary_crossentropy(x, t))
Ejemplo n.º 43
0
    def __init__(self,
                 load_weights=True,
                 is_training=True,
                 model_name='dronet_weights.npz'):

        self.model_name = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), model_name)

        def network(image):
            input_image = InputLayer(input_var=image,
                                     shape=(None, 1, 120, 160))

            conv1 = Conv2DLayer(input_image,
                                num_filters=32,
                                filter_size=(5, 5),
                                stride=(2, 2),
                                nonlinearity=rectify,
                                pad='same')

            pool1 = MaxPool2DLayer(conv1,
                                   pool_size=(3, 3),
                                   stride=(2, 2),
                                   pad=1)

            conv2 = batch_norm(
                Conv2DLayer(pool1,
                            num_filters=32,
                            filter_size=(3, 3),
                            stride=(2, 2),
                            nonlinearity=rectify,
                            pad='same'))

            conv2 = batch_norm(
                Conv2DLayer(conv2,
                            num_filters=32,
                            filter_size=(3, 3),
                            stride=(1, 1),
                            nonlinearity=rectify,
                            pad='same'))

            downsample1 = Conv2DLayer(pool1,
                                      num_filters=32,
                                      filter_size=(1, 1),
                                      stride=(2, 2),
                                      nonlinearity=rectify,
                                      pad='same')

            input3 = ElemwiseSumLayer([downsample1, conv2])

            conv3 = batch_norm(
                Conv2DLayer(input3,
                            num_filters=64,
                            filter_size=(3, 3),
                            stride=(2, 2),
                            nonlinearity=rectify,
                            pad='same'))

            conv3 = batch_norm(
                Conv2DLayer(conv3,
                            num_filters=64,
                            filter_size=(3, 3),
                            stride=(1, 1),
                            nonlinearity=rectify,
                            pad='same'))

            downsample2 = Conv2DLayer(input3,
                                      num_filters=64,
                                      filter_size=(1, 1),
                                      stride=(2, 2),
                                      nonlinearity=rectify,
                                      pad='same')

            input4 = ElemwiseSumLayer([downsample2, conv3])

            conv4 = batch_norm(
                Conv2DLayer(input4,
                            num_filters=128,
                            filter_size=(3, 3),
                            stride=(2, 2),
                            nonlinearity=rectify,
                            pad='same'))

            conv4 = batch_norm(
                Conv2DLayer(conv4,
                            num_filters=128,
                            filter_size=(3, 3),
                            stride=(1, 1),
                            nonlinearity=rectify,
                            pad='same'))

            downsample3 = Conv2DLayer(input4,
                                      num_filters=128,
                                      filter_size=(1, 1),
                                      stride=(2, 2),
                                      nonlinearity=rectify,
                                      pad='same')

            input5 = ElemwiseSumLayer([downsample3, conv4])

            flatten = DropoutLayer(FlattenLayer(input5), 0.5)

            prob_out = DenseLayer(flatten, num_units=1, nonlinearity=sigmoid)

            turn_angle = DenseLayer(flatten, num_units=1, nonlinearity=tanh)

            return prob_out, turn_angle

        # declare the variables used in the network
        self.X = T.ftensor4()
        self.Y = T.fmatrix()
        self.Z = T.fmatrix()

        # Lasagne object for the network
        self.CollisionProbability, self.TurnAngle = network(self.X)

        if is_training:
            # collision probability for training
            # and testing. Output is a theano object
            self.collision_prob = get_output(self.CollisionProbability)
            self.collision_prob_test = get_output(self.CollisionProbability,
                                                  deterministic=True)

            # turn angle for training anf testing.
            # Output is a theano object.
            self.turn_angle = get_output(self.TurnAngle)
            self.turn_angle_test = get_output(self.TurnAngle,
                                              deterministic=True)

            # Loss for the network.
            self.collision_loss = binary_crossentropy(self.collision_prob,
                                                      self.Y).mean()
            self.turn_loss = squared_error(self.turn_angle, self.Z).mean()

            # Loss to call for testing and validation.
            self.test_collision_loss = binary_crossentropy(
                self.collision_prob_test, self.Y).mean()
            self.test_turn_loss = squared_error(self.turn_angle_test,
                                                self.Z).mean()

            # network parameters for training.
            self.collision_params = get_all_params(self.CollisionProbability,
                                                   trainable=True)
            self.turn_params = get_all_params(self.TurnAngle, trainable=True)

            # network updates
            self.collision_updates = adam(self.collision_loss,
                                          self.collision_params,
                                          learning_rate=0.001)

            self.turn_updates = adam(self.turn_loss,
                                     self.turn_params,
                                     learning_rate=0.00005)

            # get test loss
            self.test_collision = theano.function(
                inputs=[self.X, self.Y],
                outputs=self.test_collision_loss,
                allow_input_downcast=True)

            self.test_turn = theano.function(inputs=[self.X, self.Z],
                                             outputs=self.test_turn_loss,
                                             allow_input_downcast=True)

            # training functions
            self.train_collision = theano.function(
                inputs=[self.X, self.Y],
                outputs=self.collision_loss,
                updates=self.collision_updates,
                allow_input_downcast=True)

            self.train_turn = theano.function(inputs=[self.X, self.Z],
                                              outputs=self.turn_loss,
                                              updates=self.turn_updates,
                                              allow_input_downcast=True)

        else:
            # collision probability for
            # testing. Output is a theano object
            self.collision_prob_test = get_output(self.CollisionProbability,
                                                  deterministic=True)

            # turn angle for testing.
            # Output is a theano object.
            self.turn_angle_test = get_output(self.TurnAngle,
                                              deterministic=True)

        # run the network to calculate collision probability
        # and turn angle given an input.
        self.dronet = theano.function(
            inputs=[self.X],
            outputs=[self.turn_angle_test, self.collision_prob_test],
            allow_input_downcast=True)

        def load():
            with np.load(self.model_name) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            set_all_param_values([self.CollisionProbability, self.TurnAngle],
                                 param_values)

        if load_weights:
            load()
Ejemplo n.º 44
0
def get_options(batchsize, nepochs, plotevery, learningrate, normalizegrads,
                clipgrads, enabledebug, optimizer, yzeromean, yunitvar,
                datadir, outputdir):

    global batch_size
    batch_size = batchsize
    global epochs
    epochs = nepochs

    print("Changing pwd to {}".format(outputdir))
    os.chdir(outputdir)

    mydir = os.path.join(os.getcwd(),
                         datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    os.makedirs(mydir)
    os.chdir(mydir)

    app_name = sys.argv[0]
    global logger
    logger = get_logger(app_name=app_name, logfolder=mydir)

    # Load dataset
    X, Y = load_data(datadir + os.sep + "coulomb.txt",
                     datadir + os.sep + "spectra_-30_0_300.txt")

    pdb.set_trace()
    Y, Y_mean, Y_std, Y_binarized = preprocess_targets(Y,
                                                       zero_mean=yzeromean,
                                                       unit_var=yunitvar)
    [X_train, X_test], [Y_train,
                        Y_test], splits = get_data_splits(X,
                                                          Y,
                                                          splits=[90, 10])
    [Y_binarized_train, Y_binarized_test] = np.split(Y_binarized, splits)[:-1]

    np.savez('Y_vals.npz',
             Y_train=Y_train,
             Y_test=Y_test,
             Y_binarized_test=Y_binarized_test,
             Y_binarized_train=Y_binarized_train,
             Y_mean=Y_mean,
             Y_std=Y_std)
    np.savez('X_vals.npz', X_train=X_train, X_test=X_test)

    dataDim = X.shape[1:]
    outputDim = Y.shape[1]
    datapoints = len(X_train)
    print("datapoints = %d" % datapoints)

    # making the datapoints shared variables
    X_train = make_shared(X_train)
    X_test = make_shared(X_test)
    Y_train = make_shared(Y_train)
    Y_test = make_shared(Y_test)
    Y_binarized_train = make_shared(Y_binarized_train)
    Y_binarized_test = make_shared(Y_binarized_test)

    # TODO !!!!I am here
    # print("Train set size {}, Train set (labelled) size {}, Test set size {}," +
    #         "Validation set size {}".format(
    #             train_set[0].size,train_set_labeled[0].size,
    #             test_set[0].size, valid_set[0].size))

    # Defining the model now.
    th_coulomb = T.ftensor4()
    th_energies = T.fmatrix()
    th_energies_bin = T.fmatrix()
    indices = T.ivector()

    l_input = InputLayer(shape=(None, 1, 29, 29),
                         input_var=th_coulomb,
                         name="Input")
    l_conv1 = Conv2DLayer(l_input, 5, 3, pad="same", name="conv1")
    l_conv2 = Conv2DLayer(l_conv1, 5, 3, pad="same", name="conv2")
    l_maxpool1 = MaxPool2DLayer(l_conv2, (2, 2), name="maxpool1")
    l_conv3 = Conv2DLayer(l_maxpool1, 5, 2, pad="same", name="conv3")
    l_maxpool2 = MaxPool2DLayer(l_conv3, (2, 2), name="maxpool2")
    l_conv4 = Conv2DLayer(l_maxpool2, 5, 2, pad="same", name="conv4")
    l_flatten = FlattenLayer(l_conv4, name="flatten")
    l_realOut = DenseLayer(l_flatten,
                           num_units=outputDim,
                           nonlinearity=linear,
                           name="realOut")
    l_binOut = DenseLayer(l_flatten,
                          num_units=outputDim,
                          nonlinearity=sigmoid,
                          name="binOut")
    l_output = ElemwiseMergeLayer([l_binOut, l_realOut], T.mul)

    energy_output = get_output(l_output)
    binary_output = get_output(l_binOut)

    # loss_real   = T.sum(abs(energy_output - th_energies))
    loss_real = T.mean((energy_output - th_energies)**2)
    loss_binary = T.sum(binary_crossentropy(binary_output, th_energies_bin))
    loss = loss_real + loss_binary

    params = get_all_params(l_output)
    grad = T.grad(loss, params)

    if normalizegrads is not None:
        grad = lasagne.updates.total_norm_constraint(grad,
                                                     max_norm=normalizegrads)

    if clipgrads is not None:
        grad = [T.clip(g, -clipgrads, clipgrads) for g in grad]

    optimization_algo = get_optimizer[optimizer]
    updates = optimization_algo(grad, params, learning_rate=learningrate)

    # train_fn  = theano.function([th_coulomb, th_energies, th_energies_bin], [loss, energy_output], updates=updates, allow_input_downcast=True)
    train_fn = theano.function(
        [indices], [loss, energy_output],
        updates=updates,
        allow_input_downcast=True,
        givens={
            th_coulomb: X_train[indices, :],
            th_energies: Y_train[indices, :],
            th_energies_bin: Y_binarized_train[indices, :]
        })
    # get_grad  = theano.function([th_coulomb, th_energies, th_energies_bin], grad)
    get_grad = theano.function(
        [indices],
        grad,
        allow_input_downcast=True,
        givens={
            th_coulomb: X_train[indices, :],
            th_energies: Y_train[indices, :],
            th_energies_bin: Y_binarized_train[indices, :]
        })

    get_convOutput = theano.function(
        [indices],
        [get_output(l_conv1), get_output(l_conv2)],
        allow_input_downcast=True,
        givens={
            th_coulomb: X_train[indices, :],
            th_energies: Y_train[indices, :],
            th_energies_bin: Y_binarized_train[indices, :]
        })

    # get_updates = theano.function([th_data, th_labl], [updates.values()])
    val_fn = theano.function([], [loss, energy_output],
                             updates=updates,
                             allow_input_downcast=True,
                             givens={
                                 th_coulomb: X_test,
                                 th_energies: Y_test,
                                 th_energies_bin: Y_binarized_test
                             })

    with open(os.path.join(mydir, "data.txt"), "w") as f:
        script = app_name
        for elem in [
                "meta_seed", "dataDim", "batch_size", "epochs", "learningrate",
                "normalizegrads", "clipgrads", "enabledebug", "optimizer",
                "script"
        ]:
            f.write("{} : {}\n".format(elem, eval(elem)))

    train_loss_lowest = np.inf
    test_loss_lowest = np.inf

    for epoch in range(epochs):
        batch_start = 0
        train_loss = []

        indices = np.random.permutation(datapoints)
        minibatches = int(datapoints / batch_size)
        for minibatch in range(minibatches):
            train_idxs = indices[batch_start:batch_start + batch_size]
            # X_train_batch  = X_train[train_idxs,:]
            # Yr_train_batch = Y_train[train_idxs,:]
            # Yb_train_batch = Y_binarized_train[train_idxs, :]

            # train_output = train_fn(X_train_batch, Yr_train_batch, Yb_train_batch)
            train_output = train_fn(train_idxs)
            batch_start = batch_start + batch_size

            train_loss.append(train_output[0])
            pdb.set_trace()

            if enabledebug:
                # Debugging information
                batchIdx = epoch * minibatches + minibatch
                fn = 'params_{:>010d}'.format()  # saving params
                param_values = get_all_param_values(l_output)
                param_norm = np.linalg.norm(
                    np.hstack([
                        np.asarray(param).flatten() for param in param_values
                    ]))
                # gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch)
                gradients = get_grad(train_idxs)
                gradient_norm = np.linalg.norm(
                    np.hstack([
                        np.asarray(gradient).flatten()
                        for gradient in gradients
                    ]))
                logger.debug(
                    "Epoch : {:0>4}  minibatch {:0>3} Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} (Values from Prev. Minibatch) Train loss {}"
                    .format(epoch, minibatch, gradient_norm, param_norm,
                            gradient_norm / param_norm, train_loss[-1]))
                param_names = [
                    param.__str__() for param in get_all_params(l_output)
                ]
                np.savez(fn + '.npz', **dict(zip(param_names, param_values)))
                np.savez('Y_train_pred_{}.npz'.format(batchIdx),
                         Y_train_pred=train_output[1])
                if train_loss[-1] < train_loss_lowest:
                    train_loss_lowest = train_loss[-1]
                    np.savez('Y_train_pred_best.npz',
                             Y_train_pred=train_output[1])
                if np.isnan(gradient_norm):
                    pdb.set_trace()

        if (epoch % plotevery == 0):
            logger.info("Epoch {} of {}".format(epoch, epochs))

            fn = 'params_{:>03d}'.format(epoch)  # saving params
            param_values = get_all_param_values(l_output)
            param_norm = np.linalg.norm(
                np.hstack(
                    [np.asarray(param).flatten() for param in param_values]))
            param_names = [
                param.__str__() for param in get_all_params(l_output)
            ]
            if not enabledebug:
                np.savez(fn + '.npz', **dict(zip(param_names, param_values)))
                np.savez('Y_train_pred_{}.npz'.format(epoch),
                         Y_train_pred=train_output[1])
                mean_train_loss = np.mean(train_loss)
                if mean_train_loss < train_loss_lowest:
                    train_loss_lowest = mean_train_loss
                    np.savez('Y_train_pred_best.npz',
                             Y_train_pred=train_output[1])

            # gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch)
            gradients = get_grad(train_idxs)
            gradient_norm = np.linalg.norm(
                np.hstack([
                    np.asarray(gradient).flatten() for gradient in gradients
                ]))
            logger.info(
                "  Gradient Norm : {}, Param Norm : {} GradNorm/ParamNorm : {} "
                .format(gradient_norm, param_norm, gradient_norm / param_norm))
            logger.info("  Train loss {:>0.4}".format(mean_train_loss))

            # test_loss, test_prediction = val_fn(X_test, Y_test, Y_binarized_test)
            test_loss, test_prediction = val_fn()
            np.savez('Y_test_pred_{}.npz'.format(epoch),
                     Y_test_pred=test_prediction)
            logger.info("  Test loss {}".format(test_loss))
            if test_loss < test_loss_lowest:
                test_loss_lowest = test_loss
                np.savez('Y_test_pred_best.npz', Y_test_pred=test_prediction)
    ann = DenseLayer(ann, DIM_H1)  #, nonlinearity=sigmoid)
    ann = DenseLayer(ann, DIM_H2)  #, nonlinearity=sigmoid)
    ann = DenseLayer(ann, 2, nonlinearity=softmax)
    return ann


if __name__ == '__main__':

    x = T.fmatrix()
    t = T.fvector()
    ann = network(x)

    prediction = get_output(ann)[:, 1]
    predict = function([x], outputs=prediction)

    loss = binary_crossentropy(prediction, t).mean()

    # L2 regularization
    if L2_REGULARIZATION:
        l2_penalty = ALPHA * regularize_network_params(ann, l2)
        loss += l2_penalty.mean()

    updates = sgd(loss_or_grads=loss,
                  params=get_all_params(ann, trainable=True),
                  learning_rate=LR)
    train = function([x, t],
                     outputs=loss,
                     updates=updates,
                     allow_input_downcast=True,
                     mode='FAST_COMPILE')
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 
    filter_size=wordDim
    pool_size=num_filters

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))


    conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)  
    hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)


    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax)

    conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)  
    hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax)

    conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)  
    hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax)

    conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)  
    hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax)

    conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)  
    hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax)


    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax)

    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax)


    # Is this important?
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])

    loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, 
                hid_1:lambda_val, network_1:lambda_val} , l2)
    updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True)
    val_acc_1 =  T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True)


    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)


    loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, 
                hid_3:lambda_val, network_3:lambda_val} , l2)
    updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True)
    val_acc_3 =  T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var))
    val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True)


    loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, 
                hid_4:lambda_val, network_4:lambda_val} , l2)
    updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True)
    val_acc_4 =  T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var))
    val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True)

    loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, 
                hid_5:lambda_val, network_5:lambda_val} , l2)
    updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True)
    val_acc_5 =  T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var))
    val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, 
                hid_6:lambda_val, network_6:lambda_val} , l2)
    updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True)
    val_acc_6 =  T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var))
    val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True)

    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)


    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
Ejemplo n.º 47
0
    def __init__(self,
                 load_weights = True,
                 is_training  = True,
                 model_name   = 'dronet_weights.npz'):

        self.model_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), model_name)

        def network(image):
            input_image = InputLayer(input_var = image,
                                     shape     = (None, 1, 120, 160))

            conv1       = Conv2DLayer(input_image,
                                      num_filters  = 32,
                                      filter_size  = (5,5),
                                      stride       = (2,2),
                                      nonlinearity = rectify,
                                      pad          = 'same')

            pool1       = MaxPool2DLayer(conv1,
                                         pool_size = (3,3),
                                         stride = (2,2),
                                         pad = 1)

            conv2       = batch_norm(Conv2DLayer(pool1,
                                                 num_filters  = 32,
                                                 filter_size  = (3,3),
                                                 stride       = (2,2),
                                                 nonlinearity = rectify,
                                                 pad          = 'same'))

            conv2       = batch_norm(Conv2DLayer(conv2,
                                                 num_filters  = 32,
                                                 filter_size  = (3,3),
                                                 stride       = (1,1),
                                                 nonlinearity = rectify,
                                                 pad          = 'same'))

            downsample1 = Conv2DLayer(pool1,
                                      num_filters  = 32,
                                      filter_size  = (1,1),
                                      stride       = (2,2),
                                      nonlinearity = rectify,
                                      pad          = 'same')

            input3      = ElemwiseSumLayer([downsample1,
                                            conv2])

            conv3       = batch_norm(Conv2DLayer(input3,
                                                 num_filters  = 64,
                                                 filter_size  = (3,3),
                                                 stride       = (2,2),
                                                 nonlinearity = rectify,
                                                 pad          = 'same'))

            conv3       = batch_norm(Conv2DLayer(conv3,
                                                 num_filters  = 64,
                                                 filter_size  = (3,3),
                                                 stride       = (1,1),
                                                 nonlinearity = rectify,
                                                 pad          = 'same'))

            downsample2 = Conv2DLayer(input3,
                                      num_filters  = 64,
                                      filter_size  = (1,1),
                                      stride       = (2,2),
                                      nonlinearity = rectify,
                                      pad          = 'same')

            input4      = ElemwiseSumLayer([downsample2,
                                            conv3])

            conv4       = batch_norm(Conv2DLayer(input4,
                                                 num_filters  = 128,
                                                 filter_size  = (3,3),
                                                 stride       = (2,2),
                                                 nonlinearity = rectify,
                                                 pad          = 'same'))

            conv4       = batch_norm(Conv2DLayer(conv4,
                                                 num_filters  = 128,
                                                 filter_size  = (3,3),
                                                 stride       = (1,1),
                                                 nonlinearity = rectify,
                                                 pad          = 'same'))

            downsample3 = Conv2DLayer(input4,
                                      num_filters  = 128,
                                      filter_size  = (1,1),
                                      stride       = (2,2),
                                      nonlinearity = rectify,
                                      pad          = 'same')

            input5      = ElemwiseSumLayer([downsample3,
                                            conv4])


            flatten     = DropoutLayer(FlattenLayer(input5), 0.5)

            prob_out    = DenseLayer(flatten,
                                     num_units    = 1,
                                     nonlinearity = sigmoid)

            turn_angle  = DenseLayer(flatten,
                                     num_units    = 1,
                                     nonlinearity = tanh)

            return prob_out, turn_angle


        # declare the variables used in the network
        self.X = T.ftensor4()
        self.Y = T.fmatrix()
        self.Z = T.fmatrix()

        # Lasagne object for the network
        self.CollisionProbability, self.TurnAngle = network(self.X)

        if is_training:
            # collision probability for training
            # and testing. Output is a theano object
            self.collision_prob      = get_output(self.CollisionProbability)
            self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True)


            # turn angle for training anf testing.
            # Output is a theano object.
            self.turn_angle      = get_output(self.TurnAngle)
            self.turn_angle_test = get_output(self.TurnAngle, deterministic=True)


            # Loss for the network.
            self.collision_loss = binary_crossentropy(self.collision_prob, self.Y).mean()
            self.turn_loss      = squared_error(self.turn_angle, self.Z).mean()


            # Loss to call for testing and validation.
            self.test_collision_loss = binary_crossentropy(self.collision_prob_test, self.Y).mean()
            self.test_turn_loss      = squared_error(self.turn_angle_test, self.Z).mean()

            # network parameters for training.
            self.collision_params = get_all_params(self.CollisionProbability, trainable=True)
            self.turn_params = get_all_params(self.TurnAngle, trainable=True)


            # network updates
            self.collision_updates = adam(self.collision_loss,
                                          self.collision_params,
                                          learning_rate = 0.001)

            self.turn_updates = adam(self.turn_loss,
                                     self.turn_params,
                                     learning_rate = 0.00005)


            # get test loss
            self.test_collision = theano.function(inputs               = [self.X, self.Y],
                                                  outputs              = self.test_collision_loss,
                                                  allow_input_downcast = True)

            self.test_turn = theano.function(inputs               = [self.X, self.Z],
                                             outputs              = self.test_turn_loss,
                                             allow_input_downcast = True)



            # training functions
            self.train_collision = theano.function(inputs               = [self.X, self.Y],
                                                   outputs              = self.collision_loss,
                                                   updates              = self.collision_updates,
                                                   allow_input_downcast = True)

            self.train_turn = theano.function(inputs               = [self.X, self.Z],
                                              outputs              = self.turn_loss,
                                              updates              = self.turn_updates,
                                              allow_input_downcast = True)

        else:
            # collision probability for 
            # testing. Output is a theano object
            self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True)

            # turn angle for testing.
            # Output is a theano object.
            self.turn_angle_test = get_output(self.TurnAngle, deterministic=True)



        # run the network to calculate collision probability
        # and turn angle given an input.
        self.dronet = theano.function(inputs = [self.X],
                                      outputs = [self.turn_angle_test,
                                                 self.collision_prob_test],
                                      allow_input_downcast = True)

        def load():
            with np.load(self.model_name) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            set_all_param_values([self.CollisionProbability,
                                  self.TurnAngle], param_values)

        if load_weights:
            load()