Ejemplo n.º 1
0
def add_shallow_conv_maxpool(network):
    regularization = 0
    filter_size = (3, 3, 3)

    network = lasagne.layers.dnn.Conv3DDNNLayer(
        incoming=network,
        pad='same',
        num_filters=32,
        filter_size=filter_size,
        nonlinearity=lasagne.nonlinearities.leaky_rectify)
    l2_penalty = regularize_layer_params_weighted({network: 0.2}, l2)
    regularization += l2_penalty
    network = lasagne.layers.dnn.MaxPool3DDNNLayer(incoming=network,
                                                   pool_size=(2, 2, 2),
                                                   stride=2)

    network = lasagne.layers.dnn.Conv3DDNNLayer(
        incoming=network,
        pad='same',
        num_filters=64,
        filter_size=filter_size,
        nonlinearity=lasagne.nonlinearities.leaky_rectify)
    l2_penalty = regularize_layer_params_weighted({network: 0.2}, l2)
    regularization += l2_penalty
    network = lasagne.layers.dnn.MaxPool3DDNNLayer(incoming=network,
                                                   pool_size=(2, 2, 2),
                                                   stride=2)

    return network, regularization
Ejemplo n.º 2
0
    def build_layer(self, model, all_l1_regs, all_l2_regs):
        model = DenseLayer(model,
                           num_units=self.n_hidden,
                           nonlinearity=utils.get_non_linearity(self.non_linearity))
        if self.l1_reg != 0:
            all_l1_regs += regularize_layer_params_weighted({model: self.l1_reg}, l1)

        if self.l2_reg != 0:
            all_l2_regs += regularize_layer_params_weighted({model: self.l2_reg}, l2)

        if self.batch_norm == "Y":
            model = batch_norm(model)
        if self.dropout_p != 0:
            model = DropoutLayer(model, p=self.dropout_p)
        return model, all_l1_regs, all_l2_regs
def build_cnn(input_var=None, batch_size=None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 32, 32, 32),
                                        input_var=input_var)

    repeatInput = Repeat(network, 40)

    network = lasagne.layers.ReshapeLayer(repeatInput, (-1, 1, 32, 32, 32))

    network_transformed = AffineTransformation3DLayer(network, batch_size * 40)

    network = lasagne.layers.ReshapeLayer(network_transformed, (-1, 32, 32, 32))

    network = Conv2DLayer(
            network, num_filters=5, filter_size=(1, 1),
            # nonlinearity=lasagne.nonlinearities.sigmoid,
            nonlinearity=lasagne.nonlinearities.identity,
            W=lasagne.init.GlorotUniform())

    network = lasagne.layers.BatchNormLayer(network)

    network = lasagne.layers.NonlinearityLayer(network, nonlinearity=lasagne.nonlinearities.rectify)

    network = Conv2DLayer(
            network, num_filters=5, filter_size=(1, 1),
            nonlinearity=lasagne.nonlinearities.identity,
            W=lasagne.init.GlorotUniform())

    network = lasagne.layers.NonlinearityLayer(network, nonlinearity=lasagne.nonlinearities.rectify)

    network = lasagne.layers.BatchNormLayer(network)

    network = Conv2DLayer(
            network, num_filters=1, filter_size=(1, 1),
            nonlinearity=lasagne.nonlinearities.identity,
            W=lasagne.init.GlorotUniform())
    network = lasagne.layers.BatchNormLayer(network)
    network = lasagne.layers.NonlinearityLayer(network, nonlinearity=lasagne.nonlinearities.rectify)

    network = NIN_block(network, 5, (128, 96, 96))
    network = MaxPool2DLayer(network, pool_size=(2, 2), stride=(2, 2))
    network = lasagne.layers.dropout(network, 0.5)
    network = NIN_block(network, 5, (128, 96, 96))
    network = MaxPool2DLayer(network, pool_size=(2, 2), stride=(2, 2))
    network = lasagne.layers.dropout(network, 0.5)
    network = NIN_block(network, 3, (128, 128, 40))
    network = MaxPool2DLayer(network, pool_size=(8, 8), stride=(1, 1))

    fc2 = lasagne.layers.DenseLayer(
              network,
              num_units=40,
              nonlinearity=lasagne.nonlinearities.identity)

    fc2_selected = SelectLayer(fc2, 40)

    weight_decay_layers = {network: 0.0, fc2: 0.002}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)
    return fc2, fc2_selected, l2_penalty, network_transformed
def build_cnn(input_var=None, batch_size=None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, 40, 40, 40),
                                        input_var=input_var)
    network = lasagne.layers.BatchNormLayer(network)
    repeatInput = Repeat(network, 10)

    network = lasagne.layers.ReshapeLayer(repeatInput, (-1, 1, 40, 40, 40))

    network_transformed = AffineTransformation3DLayer(network, batch_size * 10)

    network_transformed_average = lasagne.layers.ExpressionLayer(
        network_transformed, lambda X: X.max(-1), output_shape='auto')

    network = Conv2DLayer(
            network_transformed_average, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())

    # Max-pooling layer of factor 2 in both dimensions:
    network = MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform()
            # nonlinearity=lasagne.nonlinearities.sigmoid
            )
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    fc1 = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            # network,
            num_units=256,
            # nonlinearity=lasagne.nonlinearities.sigmoid
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    fc2 = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(fc1, p=.5),
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=10,
            )

    network_transformed = lasagne.layers.ReshapeLayer(
        network_transformed_average, (-1, 10, 40, 40))

    fc2_selected = SelectLayer(fc2, 10)

    weight_decay_layers = {fc1: 0.0, fc2: 0.002}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return fc2, fc2_selected, l2_penalty, network_transformed
Ejemplo n.º 5
0
def add_dense_layers(network, n_layers, n_units):
    regularization = 0
    for i in range(0, n_layers):
        network = lasagne.layers.DenseLayer(
            incoming=network,
            num_units=n_units,
            nonlinearity=lasagne.nonlinearities.leaky_rectify)
        l2_penalty = regularize_layer_params_weighted({network: 0.2}, l2)
        regularization += l2_penalty
    return network, regularization
def build_network_single_lstm(
    args, input1_var, input1_mask_var, input2_var, intut2_mask_var, wordEmbeddings, maxlen=36, reg=0.5 * 1e-4
):

    print ("Building model with single lstm")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]
    GRAD_CLIP = wordDim
    input_1 = InputLayer((None, maxlen), input_var=input1_var)
    batchsize, seqlen = input_1.input_var.shape
    input_1_mask = InputLayer((None, maxlen), input_var=input1_mask_var)
    emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_1.params[emb_1.W].remove("trainable")

    lstm_1 = LSTMLayer(
        emb_1, num_units=args.lstmDim, mask_input=input_1_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh
    )

    slice_1 = SliceLayer(lstm_1, indices=-1, axis=1)  # out_shape (None, args.lstmDim)

    input_2 = InputLayer((None, maxlen), input_var=input2_var)
    input_2_mask = InputLayer((None, maxlen), input_var=input2_mask_var)
    emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_2.params[emb_2.W].remove("trainable")

    lstm_2 = LSTMLayer(
        emb_2, num_units=args.lstmDim, mask_input=input_2_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh
    )

    slice_2 = SliceLayer(lstm_2, indices=-1, axis=1)

    mul = ElemwiseMergeLayer([slice_1, slice_2], merge_function=T.mul)
    sub = AbsSubLayer([slice_1, slice_2], merge_function=T.sub)
    concat = ConcatLayer([mul, sub])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    if args.task == "sts":
        network = DenseLayer(hid, num_units=5, nonlinearity=logsoftmax)

    elif args.task == "ent":
        network = DenseLayer(hid, num_units=3, nonlinearity=logsoftmax)

    layers = {lstm_1: reg, hid: reg, network: reg}
    penalty = regularize_layer_params_weighted(layers, l2)

    input_dict = {
        input_1: input1_var,
        input_2: input2_var,
        input_1_mask: input1_mask_var,
        input_2_mask: input2_mask_var,
    }

    return network, penalty, input_dict
def build_cnn(input_var=None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, 40, 40),
                                        input_var=input_var)
    # This time we do not apply input dropout, as it tends to work less well
    # for convolutional layers.

    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            #nonlinearity=lasagne.nonlinearities.sigmoid,
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())
    # Expert note: Lasagne provides alternative convolutional layers that
    # override Theano's choice of which implementation to use; for details
    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.

    # Max-pooling layer of factor 2 in both dimensions:
    network = MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W = lasagne.init.GlorotUniform()
            #nonlinearity=lasagne.nonlinearities.sigmoid
            )
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    fc1  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            #network,
            num_units=256,
            #nonlinearity=lasagne.nonlinearities.sigmoid
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    fc2  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(fc1, p=.5),
            #network,
            num_units=10,
            #nonlinearity=lasagne.nonlinearities.softmax
            nonlinearity=lasagne.nonlinearities.sigmoid
            )
    
    weight_decay_layers = {fc1:0.0, fc2:0.002}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return fc2, l2_penalty
    def test_regularize_layer_params_weighted(self, layers):
        from lasagne.regularization import regularize_layer_params_weighted
        from lasagne.regularization import apply_penalty, l2
        l_1, l_2, l_3 = layers

        layers = OrderedDict()
        layers[l_2] = 0.1
        layers[l_3] = 0.5

        loss = regularize_layer_params_weighted(layers,
                                                lasagne.regularization.l2)
        assert equal_computations([loss],
                                  [sum([0.1 * apply_penalty([l_2.W], l2),
                                        0.5 * apply_penalty([l_3.W], l2)])])
    def test_regularize_layer_params_weighted(self, layers):
        from lasagne.regularization import regularize_layer_params_weighted
        from lasagne.regularization import apply_penalty, l2
        l_1, l_2, l_3 = layers

        layers = OrderedDict()
        layers[l_2] = 0.1
        layers[l_3] = 0.5

        loss = regularize_layer_params_weighted(layers,
                                                lasagne.regularization.l2)
        assert equal_computations([loss], [
            sum([
                0.1 * apply_penalty([l_2.W], l2),
                0.5 * apply_penalty([l_3.W], l2)
            ])
        ])
Ejemplo n.º 10
0
def initNetwork(X, Y, config):
    alexNetModel = alexNet2(config, X)
    #network = lasagne.layers.FlattenLayer(alexNetModel.outLayer)
    network = lasagne.layers.DropoutLayer(alexNetModel.outLayer, p=config['prob_drop'], rescale=False)  #dropout
    wtFileName = config['weightsDir'] + 'W_5.npy'; bFileName = config['weightsDir'] + 'b_5.npy'
    network = lasagne.layers.DenseLayer(network, num_units=31, W=getClassifierParam(wtFileName, False), b=getClassifierParam(bFileName, True), nonlinearity=lasagne.nonlinearities.softmax)  #if classifier weights are not present, init with random weights

    regMult = [float(i) for i in config['regularize'].split(',')]    #read off a line like :regularize: 0.1,0.1,0.1,0.1,0.1,0.1 from the config.yaml file
    layersRegMultiplier = {alexNetModel.layers[layerId]:regMult[layerId] for layerId in range(len(alexNetModel.layers))}
    layersRegMultiplier[network] = regMult[-1]
    l2_penalty = regularize_layer_params_weighted(layersRegMultiplier, l2)

    prediction = lasagne.layers.get_output(network, deterministic=True)
    lossAll = lasagne.objectives.categorical_crossentropy(prediction, Y)  #loss function
    loss = lossAll.mean()
    loss = loss + l2_penalty

    accuracy = T.mean(T.eq(T.argmax(prediction, axis=1), Y), dtype=theano.config.floatX)
    match = T.eq(T.argmax(prediction, axis=1), Y)
    params = lasagne.layers.get_all_params(network, trainable=True)
    return [loss, params, accuracy, match]
Ejemplo n.º 11
0
def l2_network(input, n_outputs, last_nonlinearity):
    """
    l2_network is a shallow network with L2-norm regularization on all ConvLayers
    and DenseLayers (corresponds to a gaussian prior assumption on all weights).

    Usage::
        >>> import theano.tensor as T
        >>> from lasagne.layers import InputLayer
        >>> from lasagne.nonlinearities import sigmoid
        >>>
        >>> inputs = T.tensor4("inputs")
        >>> input_layer = InputLayer(input_var=inputs, shape=(None, 2, 32, 32, 32))
        >>> n_classes = 2
        >>> # apply the network
        >>> output_layer, l2_terms = dense_network(input_layer, n_classes, sigmoid)

    :param input: a lasagne layer, on top of which the network is applied
    :param n_outputs: number of output units in the last layer
    :param last_nonlinearity: what the non-linearity in the last layer should be
    :return: the last lasagne layer of the network, and L2 regularization terms
            if there are any (otherwise 0).
    """
    regularization = 0
    network = input

    # add deep convolutional structure
    network, penalty = add_shallow_conv_maxpool(network)
    regularization += penalty

    # add deep dense fully connected layers
    network, penalty = add_dense_layers(network, n_layers=1, n_units=256)
    regularization += penalty

    # add the output layer non-linearity
    network = lasagne.layers.DenseLayer(incoming=network,
                                        num_units=n_outputs,
                                        nonlinearity=last_nonlinearity)
    l2_penalty = regularize_layer_params_weighted({network: 0.2}, l2)
    regularization += l2_penalty
    return network, regularization
Ejemplo n.º 12
0
def initNetwork(X, Y, config):
    alexNetModel = alexNet2(config, X)
    #network = lasagne.layers.FlattenLayer(alexNetModel.outLayer)
    network = lasagne.layers.DropoutLayer(alexNetModel.outLayer,
                                          p=config['prob_drop'],
                                          rescale=False)  #dropout
    wtFileName = config['weightsDir'] + 'W_5.npy'
    bFileName = config['weightsDir'] + 'b_5.npy'
    network = lasagne.layers.DenseLayer(
        network,
        num_units=31,
        W=getClassifierParam(wtFileName, False),
        b=getClassifierParam(bFileName, True),
        nonlinearity=lasagne.nonlinearities.softmax
    )  #if classifier weights are not present, init with random weights

    regMult = [
        float(i) for i in config['regularize'].split(',')
    ]  #read off a line like :regularize: 0.1,0.1,0.1,0.1,0.1,0.1 from the config.yaml file
    layersRegMultiplier = {
        alexNetModel.layers[layerId]: regMult[layerId]
        for layerId in range(len(alexNetModel.layers))
    }
    layersRegMultiplier[network] = regMult[-1]
    l2_penalty = regularize_layer_params_weighted(layersRegMultiplier, l2)

    prediction = lasagne.layers.get_output(network, deterministic=True)
    lossAll = lasagne.objectives.categorical_crossentropy(prediction,
                                                          Y)  #loss function
    loss = lossAll.mean()
    loss = loss + l2_penalty

    accuracy = T.mean(T.eq(T.argmax(prediction, axis=1), Y),
                      dtype=theano.config.floatX)
    match = T.eq(T.argmax(prediction, axis=1), Y)
    params = lasagne.layers.get_all_params(network, trainable=True)
    return [loss, params, accuracy, match]
    def __init_model(self):
        """Initializes the model and compiles the network

        For the most part, this consists of setting up some bookkeeping
        for theano and lasagne, and compiling the theano functions
        """
        logging.info('initializing model')
        if self.Xshape == None or self.yshape == None:
            if self.Xshape == None:
                logging.warning("Tried to compile Neural Net before"
                                "setting input dimensionality")
            if self.yshape == None:
                logging.warning("Tried to compile Neural Net before"
                                "setting output dimensionality")
            raise ShapeError(self.Xshape, self.yshape)

        # These are theano/lasagne symbolic variable declarationss,
        # representing... the target vector(traces)
        target_vector = T.fmatrix('y')
        # our predictions
        predictions = lasagne.layers.get_output(self.layer_out)
        validation_predictions = lasagne.layers.get_output(self.layer_out,
                                                           deterministic=True)
        # the loss (diff in objective) for training
        # using MSE
        stochastic_loss = lasagne.objectives.squared_error(
            predictions, target_vector).mean()
        #print(stochastic_loss)
        deterministic_loss = lasagne.objectives.squared_error(
            validation_predictions, target_vector).mean()
        # using cross entropy
        #stochastic_loss = lasagne.objectives.categorical_crossentropy(predictions, target_vector).mean()
        # the loss for validation
        #deterministic_loss = lasagne.objectives.categorical_crossentropy(test_predictions, target_vector).mean()
        # calculate loss
        loss = stochastic_loss
        # should regularization be used?
        config = self.config
        if config:
            if config.l1_regularization:
                logging.info("Using L1 regularization")
                l1_penalty = regularize_layer_params(self.layer_out, l1) * 1e-4
                loss += l1_penalty
            if config.l2_regularization:
                logging.info("Using L2 regularization with weights")
                for sublayer in self.layer_in:
                    logging.info("\tinput layer ({1}) weight: {0}".format(
                        self.layer_weights[sublayer], sublayer.name))
                logging.info("\toutput layer weight: {0}".format(
                    self.layer_weights[self.layer_out]))
                l2_penalty = regularize_layer_params_weighted(
                    self.layer_weights, l2)
                loss += l2_penalty
        else:
            logging.info("No regularization")
        # the network parameters (i.e. weights)
        all_params = lasagne.layers.get_all_params(self.layer_out)
        # how to update the weights
        updates = lasagne.updates.nesterov_momentum(loss_or_grads=loss,
                                                    params=all_params,
                                                    learning_rate=0.1,
                                                    momentum=0.9)

        # The theano functions for training, validating, and tracing.
        #   These get method-level wrappers below
        logging.info('compiling theano functions')
        self._train_fn = theano.function(
            on_unused_input='warn',
            inputs=[l.input_var for l in self.layer_in] + [target_vector],
            outputs=[stochastic_loss],
            updates=updates)
        self._valid_fn = theano.function(
            on_unused_input='warn',
            inputs=[l.input_var for l in self.layer_in] + [target_vector],
            outputs=[deterministic_loss, validation_predictions])
        self._trace_fn = theano.function(
            on_unused_input='warn',
            inputs=[l.input_var for l in self.layer_in],
            outputs=[
                validation_predictions * self.roi.shape[0] + self.roi.offset[0]
            ])
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 
    filter_size=wordDim
    pool_size=num_filters

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))


    conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)  
    hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)


    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax)

    conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)  
    hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax)

    conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)  
    hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax)

    conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)  
    hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax)

    conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)  
    hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax)


    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax)

    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax)


    # Is this important?
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])

    loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, 
                hid_1:lambda_val, network_1:lambda_val} , l2)
    updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True)
    val_acc_1 =  T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True)


    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)


    loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, 
                hid_3:lambda_val, network_3:lambda_val} , l2)
    updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True)
    val_acc_3 =  T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var))
    val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True)


    loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, 
                hid_4:lambda_val, network_4:lambda_val} , l2)
    updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True)
    val_acc_4 =  T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var))
    val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True)

    loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, 
                hid_5:lambda_val, network_5:lambda_val} , l2)
    updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True)
    val_acc_5 =  T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var))
    val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, 
                hid_6:lambda_val, network_6:lambda_val} , l2)
    updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True)
    val_acc_6 =  T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var))
    val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True)

    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)


    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
Ejemplo n.º 15
0
    def build_simple_network(self):
        """
        Builds a very, very simple non-memory network to assess the effectiveness of training on the task.
        - Input: (batch_size, max_seqlen, max_sentlen)
        - Wordwise embedding into (batch_size, max_seqlen, max_sentlen, embed_size) 
        - Sum all words in a sentence: (batch_size, max_seqlen, embed_size)
        - Reshape embedding into (batch_size, max_seqlen * embed_size)
        - 3 hidden layers with sigmoid, hidden dim (512, 512, 256)
        """

        batch_size, max_seqlen, max_sentlen, embedding_size, vocab = self.batch_size, self.max_seqlen, self.max_sentlen, self.embedding_size, self.vocab

        self.hidden_size = 256
        c = T.imatrix()
        y = T.imatrix()

        self.c_shared = theano.shared(np.zeros((batch_size, max_seqlen),
                                               dtype=np.int32),
                                      borrow=True)
        self.a_shared = theano.shared(np.zeros((batch_size, self.num_classes),
                                               dtype=np.int32),
                                      borrow=True)

        S_shared = theano.shared(self.S, borrow=True)
        cc = S_shared[c.flatten()].reshape(
            (batch_size, max_seqlen, max_sentlen))

        l_context_in = lasagne.layers.InputLayer(shape=(batch_size, max_seqlen,
                                                        max_sentlen))

        L = self.build_glove_embedding(root_dir +
                                       "/data/glove/glove.6B.100d.txt",
                                       hidden_size=embedding_size)
        print L

        embedding = lasagne.layers.EmbeddingLayer(l_context_in,
                                                  len(vocab) + 1,
                                                  embedding_size,
                                                  W=L)

        sum_embeddings = ScaleSumLayer(embedding, axis=2)

        reshape_sum = lasagne.layers.ReshapeLayer(
            sum_embeddings, shape=(batch_size, max_seqlen * embedding_size))
        # Fully connected layers

        dense_1 = lasagne.layers.DenseLayer(reshape_sum,
                                            self.hidden_size,
                                            W=lasagne.init.GlorotNormal(),
                                            nonlinearity=T.nnet.sigmoid)
        dense_2 = lasagne.layers.DenseLayer(dense_1,
                                            self.hidden_size,
                                            W=lasagne.init.GlorotNormal(),
                                            nonlinearity=T.nnet.sigmoid)
        l_pred = lasagne.layers.DenseLayer(
            dense_2,
            self.num_classes,
            nonlinearity=lasagne.nonlinearities.softmax)

        rand_in = np.random.randint(0,
                                    len(vocab) - 1,
                                    size=(batch_size, max_seqlen, max_sentlen))

        fake_probs = lasagne.layers.get_output(l_pred, {
            l_context_in: rand_in
        }).eval()
        print "fake_probs: ", fake_probs

        probas = lasagne.layers.helper.get_output(l_pred, {l_context_in: cc})

        pred = T.argmax(probas, axis=1)

        # l2 regularization
        reg_coeff = 1e-1
        p_metric = l2
        layer_dict = {
            dense_1: reg_coeff,
            dense_2: reg_coeff,
            l_pred: reg_coeff
        }
        reg_cost = reg_coeff * regularize_layer_params_weighted(
            layer_dict, p_metric)

        cost = T.nnet.categorical_crossentropy(probas, y).mean()  #+ reg_cost

        params = lasagne.layers.helper.get_all_params(l_pred, trainable=True)
        grads = T.grad(cost, params)

        scaled_grads = lasagne.updates.total_norm_constraint(
            grads, self.max_norm)
        updates = lasagne.updates.adam(scaled_grads,
                                       params,
                                       learning_rate=self.lr)

        givens = {
            c: self.c_shared,
            y: self.a_shared,
        }

        self.train_model = theano.function([],
                                           cost,
                                           givens=givens,
                                           updates=updates,
                                           on_unused_input='ignore')
        self.compute_pred = theano.function([],
                                            pred,
                                            givens=givens,
                                            on_unused_input='ignore')

        zero_vec_tensor = T.vector()
        self.zero_vec = np.zeros(embedding_size, dtype=theano.config.floatX)
        self.set_zero = theano.function([zero_vec_tensor],
                                        on_unused_input='ignore')

        #self.nonlinearity = nonlinearity
        self.network = l_pred
Ejemplo n.º 16
0
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
Ejemplo n.º 17
0
def build_cnn(input_var=None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 3, 32, 32),
                                        input_var=input_var)

    #norm0 = BatchNormLayer(network)

    # conv1
    conv1 = Conv2DLayer(network, num_filters=64, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1")

    conv1a = Conv2DLayer(conv1, num_filters=64, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1a")


    pool1 = MaxPool2DLayer(conv1a, pool_size=(2, 2), stride=(2, 2), pad=0)

    #norm1 = BatchNormLayer(pool1)
    # pool1


    # conv2
    conv2 = Conv2DLayer(lasagne.layers.dropout(pool1, p = 0.5),
                        num_filters=128, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2')

    conv2a = Conv2DLayer(conv2,
                        num_filters=128, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2a')

    pool2 = MaxPool2DLayer(conv2a, pool_size=(2, 2), stride=(2, 2), pad=0)

    # norm2
    #norm2 = BatchNormLayer(pool2)

    # pool2


    conv3 = Conv2DLayer(lasagne.layers.dropout(pool2, p = 0.5),
                        num_filters=256, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.1),
                        name='conv3')

    pool3 = MaxPool2DLayer(conv3, pool_size=(2, 2), stride=(2, 2), pad=0)

    #norm3 = BatchNormLayer(pool3)

    # fc1
    fc1 = DenseLayer(lasagne.layers.dropout(pool3, p = 0.5),
                     num_units=256,
                     nonlinearity=lasagne.nonlinearities.rectify,
                     W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.1),
                     name="fc1")

    # fc3
    softmax_layer = DenseLayer(lasagne.layers.dropout(fc1, p = 0.5),
                               num_units=9,
                               nonlinearity=lasagne.nonlinearities.softmax,
                               W=lasagne.init.GlorotUniform(),
                               b=lasagne.init.Constant(0.0),
                               name="softmax")

    intermediate_layer = pool2
    
    weight_decay_layers = {fc1: 0.0}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return softmax_layer, l2_penalty
def multi_task_classifier(args,
                          input_var,
                          target_var,
                          wordEmbeddings,
                          seqlen,
                          num_feats,
                          lambda_val=0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen - kw + 1
    stride = 1
    filter_size = wordDim
    pool_size = num_filters

    input = InputLayer((None, seqlen, num_feats), input_var=input_var)
    batchsize, _, _ = input.input_var.shape

    #span
    emb1 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim))
    conv1d_1 = DimshuffleLayer(
        Conv1DLayer(reshape1,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)
    hid_1 = DenseLayer(maxpool_1,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)
    """
    #DocTimeRel
    emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim))
    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax)
    """

    #Type
    emb3 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim))
    conv1d_3 = DimshuffleLayer(
        Conv1DLayer(reshape3,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)
    hid_3 = DenseLayer(maxpool_3,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax)

    #Degree
    emb4 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim))
    conv1d_4 = DimshuffleLayer(
        Conv1DLayer(reshape4,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)
    hid_4 = DenseLayer(maxpool_4,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax)

    #Polarity
    emb5 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim))
    conv1d_5 = DimshuffleLayer(
        Conv1DLayer(reshape5,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)
    hid_5 = DenseLayer(maxpool_5,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax)

    #ContextualModality
    emb6 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim))
    conv1d_6 = DimshuffleLayer(
        Conv1DLayer(reshape6,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)
    hid_6 = DenseLayer(maxpool_6,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax)
    """
    #ContextualAspect
    emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim))
    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax)
    """
    """
    #Permanence
    emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim))
    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax)
    """

    # Is this important?
    """
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])
    """
    network_1_out = get_output(network_1)
    network_3_out = get_output(network_3)
    network_4_out = get_output(network_4)
    network_5_out = get_output(network_5)
    network_6_out = get_output(network_6)

    loss_1 = T.mean(binary_crossentropy(
        network_1_out, target_var)) + regularize_layer_params_weighted(
            {
                emb1: lambda_val,
                conv1d_1: lambda_val,
                hid_1: lambda_val,
                network_1: lambda_val
            }, l2)
    updates_1 = adagrad(loss_1,
                        get_all_params(network_1, trainable=True),
                        learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var],
                                 loss_1,
                                 updates=updates_1,
                                 allow_input_downcast=True)
    val_acc_1 = T.mean(
        binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var],
                               val_acc_1,
                               allow_input_downcast=True)
    """
    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)
    """

    loss_3 = T.mean(categorical_crossentropy(
        network_3_out, target_var)) + regularize_layer_params_weighted(
            {
                emb3: lambda_val,
                conv1d_3: lambda_val,
                hid_3: lambda_val,
                network_3: lambda_val
            }, l2)
    updates_3 = adagrad(loss_3,
                        get_all_params(network_3, trainable=True),
                        learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var],
                                 loss_3,
                                 updates=updates_3,
                                 allow_input_downcast=True)
    val_acc_3 = T.mean(
        categorical_accuracy(get_output(network_3, deterministic=True),
                             target_var))
    val_fn_3 = theano.function([input_var, target_var],
                               val_acc_3,
                               allow_input_downcast=True)

    loss_4 = T.mean(categorical_crossentropy(
        network_4_out, target_var)) + regularize_layer_params_weighted(
            {
                emb4: lambda_val,
                conv1d_4: lambda_val,
                hid_4: lambda_val,
                network_4: lambda_val
            }, l2)
    updates_4 = adagrad(loss_4,
                        get_all_params(network_4, trainable=True),
                        learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var],
                                 loss_4,
                                 updates=updates_4,
                                 allow_input_downcast=True)
    val_acc_4 = T.mean(
        categorical_accuracy(get_output(network_4, deterministic=True),
                             target_var))
    val_fn_4 = theano.function([input_var, target_var],
                               val_acc_4,
                               allow_input_downcast=True)

    loss_5 = T.mean(categorical_crossentropy(
        network_5_out, target_var)) + regularize_layer_params_weighted(
            {
                emb5: lambda_val,
                conv1d_5: lambda_val,
                hid_5: lambda_val,
                network_5: lambda_val
            }, l2)
    updates_5 = adagrad(loss_5,
                        get_all_params(network_5, trainable=True),
                        learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var],
                                 loss_5,
                                 updates=updates_5,
                                 allow_input_downcast=True)
    val_acc_5 = T.mean(
        categorical_accuracy(get_output(network_5, deterministic=True),
                             target_var))
    val_fn_5 = theano.function([input_var, target_var],
                               val_acc_5,
                               allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(
        network_6_out, target_var)) + regularize_layer_params_weighted(
            {
                emb6: lambda_val,
                conv1d_6: lambda_val,
                hid_6: lambda_val,
                network_6: lambda_val
            }, l2)
    updates_6 = adagrad(loss_6,
                        get_all_params(network_6, trainable=True),
                        learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var],
                                 loss_6,
                                 updates=updates_6,
                                 allow_input_downcast=True)
    val_acc_6 = T.mean(
        categorical_accuracy(get_output(network_6, deterministic=True),
                             target_var))
    val_fn_6 = theano.function([input_var, target_var],
                               val_acc_6,
                               allow_input_downcast=True)
    """
    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)
    """
    """
    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
    """
    return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6
Ejemplo n.º 19
0
def main(exp_name, embed_data, train_data, train_data_stats, val_data, val_data_stats,
         test_data, test_data_stats, log_path, batch_size, num_epochs,
         unroll_steps, learn_rate, num_dense, dense_dim, penalty, reg_coeff):
    """
    Main run function for training model.
    :param exp_name:
    :param embed_data:
    :param train_data:
    :param train_data_stats:
    :param val_data:
    :param val_data_stats:
    :param test_data:
    :param test_data_stats:
    :param log_path:
    :param batch_size:
    :param num_epochs:
    :param unroll_steps:
    :param learn_rate:
    :param num_dense: Number of dense fully connected layers to add after concatenation layer
    :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1
    :param penalty: Penalty to use for regularization
    :param reg_weight: Regularization coeff to use for each layer of network; may
                       want to support different coefficient for different layers
    :return:
    """
    # Set random seed for deterministic results
    np.random.seed(0)
    num_ex_to_train = 30

    # Load embedding table
    table = EmbeddingTable(embed_data)
    vocab_size = table.sizeVocab
    dim_embeddings = table.dimEmbeddings
    embeddings_mat = table.embeddings


    train_prem, train_hyp = generate_data(train_data, train_data_stats, "left", "right", table, seq_len=unroll_steps)
    val_prem, val_hyp = generate_data(val_data, val_data_stats, "left", "right", table, seq_len=unroll_steps)
    train_labels = convertLabelsToMat(train_data)
    val_labels = convertLabelsToMat(val_data)

    # To test for overfitting capabilities of model
    if num_ex_to_train > 0:
        val_prem = val_prem[0:num_ex_to_train]
        val_hyp = val_hyp[0:num_ex_to_train]
        val_labels = val_labels[0:num_ex_to_train]

    # Theano expressions for premise/hypothesis inputs to network
    x_p = T.imatrix()
    x_h = T.imatrix()
    target_values = T.fmatrix(name="target_output")


    # Embedding layer for premise
    l_in_prem = InputLayer((batch_size, unroll_steps))
    l_embed_prem = EmbeddingLayer(l_in_prem, input_size=vocab_size,
                        output_size=dim_embeddings, W=embeddings_mat)

    # Embedding layer for hypothesis
    l_in_hyp = InputLayer((batch_size, unroll_steps))
    l_embed_hyp = EmbeddingLayer(l_in_hyp, input_size=vocab_size,
                        output_size=dim_embeddings, W=embeddings_mat)


    # Ensure embedding matrix parameters are not trainable
    l_embed_hyp.params[l_embed_hyp.W].remove('trainable')
    l_embed_prem.params[l_embed_prem.W].remove('trainable')

    l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp)
    l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem)

    # Concatenate sentence embeddings for premise and hypothesis
    l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum])

    l_in = l_concat
    l_output = l_concat
    # Add 'num_dense' dense layers with tanh
    # top layer is softmax
    if num_dense > 1:
        for n in range(num_dense):
            if n == num_dense-1:
                l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax)
            else:
                l_in = DenseLayer(l_in, num_units=dense_dim, nonlinearity=lasagne.nonlinearities.tanh)
    else:
        l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax)

    network_output = get_output(l_output, {l_in_prem: x_p, l_in_hyp: x_h}) # Will have shape (batch_size, 3)
    f_dense_output = theano.function([x_p, x_h], network_output, on_unused_input='warn')

    # Compute cost
    if penalty == "l2":
        p_metric = l2
    elif penalty == "l1":
        p_metric = l1

    layers = lasagne.layers.get_all_layers(l_output)
    layer_dict = {l: reg_coeff for l in layers}
    reg_cost = reg_coeff * regularize_layer_params_weighted(layer_dict, p_metric)
    cost = T.mean(T.nnet.categorical_crossentropy(network_output, target_values).mean()) + reg_cost
    compute_cost = theano.function([x_p, x_h, target_values], cost)

    # Compute accuracy
    accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1), T.argmax(target_values, axis=-1)),
                      dtype=theano.config.floatX)
    compute_accuracy = theano.function([x_p, x_h, target_values], accuracy)

    label_output = T.argmax(network_output, axis=-1)
    predict = theano.function([x_p, x_h], label_output)

    # Define update/train functions
    all_params = lasagne.layers.get_all_params(l_output, trainable=True)
    updates = lasagne.updates.rmsprop(cost, all_params, learn_rate)
    train = theano.function([x_p, x_h, target_values], cost, updates=updates)

    # TODO: Augment embedding layer to allow for masking inputs

    stats = Stats(exp_name)
    acc_num = 10

    #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size)
    minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size)
    print("Training ...")
    try:
        total_num_ex = 0
        for epoch in xrange(num_epochs):
            for _, minibatch in minibatches:
                total_num_ex += len(minibatch)
                stats.log("Processed {0} total examples in epoch {1}".format(str(total_num_ex),
                                                                          str(epoch)))

                #prem_batch = val_prem[minibatch]
                #hyp_batch = val_hyp[minibatch]
                #labels_batch = val_labels[minibatch]

                prem_batch = train_prem[minibatch]
                hyp_batch = train_hyp[minibatch]
                labels_batch = train_labels[minibatch]

                train(prem_batch, hyp_batch, labels_batch)
                cost_val = compute_cost(prem_batch, hyp_batch, labels_batch)

                stats.recordCost(total_num_ex, cost_val)
                # Periodically compute and log train/dev accuracy
                if total_num_ex%(acc_num*batch_size) == 0:
                    train_acc = compute_accuracy(train_prem, train_hyp, train_labels)
                    dev_acc = compute_accuracy(val_prem, val_hyp, val_labels)
                    stats.recordAcc(total_num_ex, train_acc, dataset="train")
                    stats.recordAcc(total_num_ex, dev_acc, dataset="dev")

    except KeyboardInterrupt:
        pass
Ejemplo n.º 20
0
def build_cnn(input_var=None, batch_size = None):
    """Build the CIFAR-10 model.

    Args:
    images: Images returned from distorted_inputs() or inputs().

    Returns:
    Logits.
    """
    # We instantiate all variables using tf.get_variable() instead of
    # tf.Variable() in order to share variables across multiple GPU training runs.
    # If we only ran this model on a single GPU, we could simplify this function
    # by replacing all instances of tf.get_variable() with tf.Variable().
    #

    input_layer = InputLayer((batch_size, 3, ORIGINAL_IMAGE_SIZE, ORIGINAL_IMAGE_SIZE), input_var=input_var)

    repeatInput = Repeat(input_layer, 10)

    reshapeInput = lasagne.layers.ReshapeLayer(repeatInput, (batch_size * 10, 3, ORIGINAL_IMAGE_SIZE, ORIGINAL_IMAGE_SIZE))

    original_transformed = BrightnessAdjustLayer(reshapeInput, batch_size * 10)

    # norm0 = BatchNormLayer(original_transformed)

    # conv1
    conv1 = Conv2DLayer(original_transformed, num_filters=64, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1")
    
    conv1a = Conv2DLayer(conv1, num_filters=64, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1a")
    

    pool1 = MaxPool2DLayer(conv1a, pool_size=(2, 2), stride=(2, 2), pad=0)
    
    # norm1 = LocalResponseNormalization2DLayer(pool1, alpha=0.001 / 9.0,
    #                                          beta=0.75, k=1.0, n=9)
    norm1 = BatchNormLayer(pool1)

    
    # conv2
    conv2 = Conv2DLayer(lasagne.layers.dropout(norm1, p = 0.5), 
                        num_filters=128, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2')

    conv2a = Conv2DLayer(conv2, 
                        num_filters=128, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2a')
    
    pool2 = MaxPool2DLayer(conv2a, pool_size=(2, 2), stride=(2, 2), pad=0)
    
    # norm2 = LocalResponseNormalization2DLayer(pool2, alpha=0.001 / 9.0,
    #                                           beta=0.75, k=1.0, n=9)

    norm2 = BatchNormLayer(pool2)

    # pool2
    
    conv3 = Conv2DLayer(lasagne.layers.dropout(norm2, p = 0.5), 
                        num_filters=256, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.GlorotUniform(),
                        b=lasagne.init.Constant(0.1),
                        name='conv3')
    
    
    pool3 = MaxPool2DLayer(conv3, pool_size=(2, 2), stride=(2, 2), pad=0)
    
    # norm3 = LocalResponseNormalization2DLayer(pool3, alpha=0.001 / 9.0,
    #                                           beta=0.75, k=1.0, n=9)
    norm3 = BatchNormLayer(pool3)
    
    # fc1
    fc1 = DenseLayer(lasagne.layers.dropout(norm3, p = 0.5), 
                     num_units=256,
                     nonlinearity=lasagne.nonlinearities.rectify,
                     W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.1),
                     name="fc1")

    # fc3
    output_layer = DenseLayer(lasagne.layers.dropout(fc1, p = 0.5),
                               num_units=10,
                               #nonlinearity=lasagne.nonlinearities.softmax,
                               nonlinearity=lasagne.nonlinearities.identity,
                               W=lasagne.init.GlorotUniform(),
                               b=lasagne.init.Constant(0.0),
                               name="output")
    
    output_transformed = lasagne.layers.ReshapeLayer(original_transformed, (batch_size, 10, 3, ORIGINAL_IMAGE_SIZE, ORIGINAL_IMAGE_SIZE))

    output_selected = SelectLayer(output_layer, 10)

    # Weight Decay
    weight_decay_layers = {original_transformed: 0.01}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return output_layer, output_selected, l2_penalty, output_transformed
def build_network_lstm2dconv(
    args, input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var, wordEmbeddings, maxlen=36
):

    print ("Building model lstm + 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]
    GRAD_CLIP = wordDim

    num_filters = 8
    filter_size = (2, 9)
    stride = 1
    pool_size = (1, 2)

    input_1 = InputLayer((None, maxlen), input_var=input1_var)
    batchsize, seqlen = input_1.input_var.shape
    input_1_mask = InputLayer((None, maxlen), input_var=input1_mask_var)
    emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_1.params[emb_1.W].remove("trainable")

    lstm_1 = LSTMLayer(
        emb_1, num_units=args.lstmDim, mask_input=input_1_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh
    )

    lstm_1_back = LSTMLayer(
        emb_1,
        num_units=args.lstmDim,
        mask_input=input_1_mask,
        grad_clipping=GRAD_CLIP,
        nonlinearity=tanh,
        backwards=True,
    )

    slice_1 = SliceLayer(lstm_1, indices=-1, axis=1)  # out_shape (None, args.lstmDim)
    slice_1_back = SliceLayer(lstm_1_back, indices=0, axis=1)  # out_shape (None, args.lstmDim)

    concat_1 = ConcatLayer([slice_1, slice_1_back], axis=1)

    reshape_1 = ReshapeLayer(concat_1, (batchsize, 1, 2, args.lstmDim))
    conv2d_1 = Conv2DLayer(
        reshape_1,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,  # (None, 3, 1, 48)
        nonlinearity=rectify,
        W=GlorotUniform(),
    )
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size)  # (None, 3, 1, 24)
    forward_1 = FlattenLayer(maxpool_1)  # (None, 72)

    input_2 = InputLayer((None, maxlen), input_var=input2_var)
    input_2_mask = InputLayer((None, maxlen), input_var=input2_mask_var)
    emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_2.params[emb_2.W].remove("trainable")

    lstm_2 = LSTMLayer(
        emb_2, num_units=args.lstmDim, mask_input=input_2_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh
    )

    lstm_2_back = LSTMLayer(
        emb_2,
        num_units=args.lstmDim,
        mask_input=input_2_mask,
        grad_clipping=GRAD_CLIP,
        nonlinearity=tanh,
        backwards=True,
    )

    slice_2 = SliceLayer(lstm_2, indices=-1, axis=1)
    slice_2_b = SliceLayer(lstm_2_back, indices=0, axis=1)
    concat_2 = ConcatLayer([slice_2, slice_2_b])

    reshape_2 = ReshapeLayer(concat_2, (batchsize, 1, 2, args.lstmDim))
    conv2d_2 = Conv2DLayer(
        reshape_2,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size)
    forward_2 = FlattenLayer(maxpool_2)  # (None, 72)

    # elementwisemerge need fix the sequence length
    mul = ElemwiseMergeLayer([forward_1, forward_2], merge_function=T.mul)
    sub = AbsSubLayer([forward_1, forward_2], merge_function=T.sub)

    concat = ConcatLayer([mul, sub])
    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    if args.task == "sts":
        network = DenseLayer(hid, num_units=5, nonlinearity=softmax)

    elif args.task == "ent":
        network = DenseLayer(hid, num_units=3, nonlinearity=softmax)

    lambda_val = 0.5 * 1e-4
    layers = {lstm_1: lambda_val, conv1d_1: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)

    return network, penalty
def build_network_2dconv(
    args, input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var, wordEmbeddings, maxlen=36
):

    print ("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100

    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    # two conv pool layer
    # filter_size=(10, 100)
    # pool_size=(4,4)

    input_1 = InputLayer((None, maxlen), input_var=input1_var)
    batchsize, seqlen = input_1.input_var.shape
    # input_1_mask = InputLayer((None, maxlen),input_var=input1_mask_var)
    emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_1.params[emb_1.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape_1 = ReshapeLayer(emb_1, (batchsize, 1, maxlen, wordDim))

    conv2d_1 = Conv2DLayer(
        reshape_1,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size)  # (None, 100, 1, 1)

    """
    filter_size_2=(4, 10)
    pool_size_2=(2,2)
    conv2d_1 = Conv2DLayer(maxpool_1, num_filters=num_filters, filter_size=filter_size_2, stride=stride, 
        nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1)
    maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size_2) #(None, 100, 1, 1) (None, 100, 1, 20)
    """

    forward_1 = FlattenLayer(maxpool_1)  # (None, 100) #(None, 50400)

    input_2 = InputLayer((None, maxlen), input_var=input2_var)
    # input_2_mask = InputLayer((None, maxlen),input_var=input2_mask_var)
    emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_2.params[emb_2.W].remove("trainable")

    reshape_2 = ReshapeLayer(emb_2, (batchsize, 1, maxlen, wordDim))
    conv2d_2 = Conv2DLayer(
        reshape_2,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size)  # (None, 100, 1, 1)

    """
    conv2d_2 = Conv2DLayer(maxpool_2, num_filters=num_filters, filter_size=filter_size_2, stride=stride, 
        nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1)
    maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size_2) #(None, 100, 1, 1)
    """

    forward_2 = FlattenLayer(maxpool_2)  # (None, 100)

    # elementwisemerge need fix the sequence length
    mul = ElemwiseMergeLayer([forward_1, forward_2], merge_function=T.mul)
    sub = AbsSubLayer([forward_1, forward_2], merge_function=T.sub)
    concat = ConcatLayer([mul, sub])

    concat = ConcatLayer([forward_1, forward_2])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    if args.task == "sts":
        network = DenseLayer(hid, num_units=5, nonlinearity=softmax)

    elif args.task == "ent":
        network = DenseLayer(hid, num_units=3, nonlinearity=softmax)

    # prediction = get_output(network, {input_1:input1_var, input_2:input2_var})
    prediction = get_output(network)

    loss = T.mean(categorical_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d_1: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    # test_prediction = get_output(network, {input_1:input1_var, input_2:input2_var}, deterministic=True)
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(categorical_crossentropy(test_prediction, target_var))

    """
    train_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)
    """
    train_fn = theano.function([input1_var, input2_var, target_var], loss, updates=updates, allow_input_downcast=True)

    if args.task == "sts":
        """
        val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
            [test_loss, test_prediction], allow_input_downcast=True)
        """
        val_fn = theano.function(
            [input1_var, input2_var, target_var], [test_loss, test_prediction], allow_input_downcast=True
        )

    elif args.task == "ent":
        # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
        test_acc = T.mean(categorical_accuracy(test_prediction, target_var))

        """
        val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], 
            [test_loss, test_acc], allow_input_downcast=True)
        """
        val_fn = theano.function([input1_var, input2_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen):

    print("Building model with LSTM")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    GRAD_CLIP = wordDim

    args.lstmDim = 150

    input = InputLayer((None, seqlen),input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    input_mask = InputLayer((None, seqlen),input_var=input_mask_var)
    
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb_1.W].remove('trainable')

    lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh)

    lstm_back = LSTMLayer(
        emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh, backwards=True)

    slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim)
    slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim)

    concat = ConcatLayer([slice_forward, slice_backward])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))


    train_fn = theano.function([input_var, input_mask_var,target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))

    val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
def build_network_2dconv(args, input_var, target_var, wordEmbeddings, maxlen=60):

    print("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100
    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    input = InputLayer((None, maxlen), input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb.params[emb.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape = ReshapeLayer(emb, (batchsize, 1, maxlen, wordDim))

    conv2d = Conv2DLayer(
        reshape,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size)  # (None, 100, 1, 1)

    forward = FlattenLayer(maxpool)  # (None, 100) #(None, 50400)

    hid = DenseLayer(forward, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)

    loss = T.mean(binary_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction, target_var))

    train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
Ejemplo n.º 26
0
def main(exp_name, embed_data, train_data, train_data_stats, val_data,
         val_data_stats, test_data, test_data_stats, log_path, batch_size,
         num_epochs, unroll_steps, learn_rate, num_dense, dense_dim, penalty,
         reg_coeff):
    """
    Main run function for training model.
    :param exp_name:
    :param embed_data:
    :param train_data:
    :param train_data_stats:
    :param val_data:
    :param val_data_stats:
    :param test_data:
    :param test_data_stats:
    :param log_path:
    :param batch_size:
    :param num_epochs:
    :param unroll_steps:
    :param learn_rate:
    :param num_dense: Number of dense fully connected layers to add after concatenation layer
    :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1
    :param penalty: Penalty to use for regularization
    :param reg_weight: Regularization coeff to use for each layer of network; may
                       want to support different coefficient for different layers
    :return:
    """
    # Set random seed for deterministic results
    np.random.seed(0)
    num_ex_to_train = 30

    # Load embedding table
    table = EmbeddingTable(embed_data)
    vocab_size = table.sizeVocab
    dim_embeddings = table.dimEmbeddings
    embeddings_mat = table.embeddings

    train_prem, train_hyp = generate_data(train_data,
                                          train_data_stats,
                                          "left",
                                          "right",
                                          table,
                                          seq_len=unroll_steps)
    val_prem, val_hyp = generate_data(val_data,
                                      val_data_stats,
                                      "left",
                                      "right",
                                      table,
                                      seq_len=unroll_steps)
    train_labels = convertLabelsToMat(train_data)
    val_labels = convertLabelsToMat(val_data)

    # To test for overfitting capabilities of model
    if num_ex_to_train > 0:
        val_prem = val_prem[0:num_ex_to_train]
        val_hyp = val_hyp[0:num_ex_to_train]
        val_labels = val_labels[0:num_ex_to_train]

    # Theano expressions for premise/hypothesis inputs to network
    x_p = T.imatrix()
    x_h = T.imatrix()
    target_values = T.fmatrix(name="target_output")

    # Embedding layer for premise
    l_in_prem = InputLayer((batch_size, unroll_steps))
    l_embed_prem = EmbeddingLayer(l_in_prem,
                                  input_size=vocab_size,
                                  output_size=dim_embeddings,
                                  W=embeddings_mat)

    # Embedding layer for hypothesis
    l_in_hyp = InputLayer((batch_size, unroll_steps))
    l_embed_hyp = EmbeddingLayer(l_in_hyp,
                                 input_size=vocab_size,
                                 output_size=dim_embeddings,
                                 W=embeddings_mat)

    # Ensure embedding matrix parameters are not trainable
    l_embed_hyp.params[l_embed_hyp.W].remove('trainable')
    l_embed_prem.params[l_embed_prem.W].remove('trainable')

    l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp)
    l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem)

    # Concatenate sentence embeddings for premise and hypothesis
    l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum])

    l_in = l_concat
    l_output = l_concat
    # Add 'num_dense' dense layers with tanh
    # top layer is softmax
    if num_dense > 1:
        for n in range(num_dense):
            if n == num_dense - 1:
                l_output = DenseLayer(
                    l_in,
                    num_units=NUM_DENSE_UNITS,
                    nonlinearity=lasagne.nonlinearities.softmax)
            else:
                l_in = DenseLayer(l_in,
                                  num_units=dense_dim,
                                  nonlinearity=lasagne.nonlinearities.tanh)
    else:
        l_output = DenseLayer(l_in,
                              num_units=NUM_DENSE_UNITS,
                              nonlinearity=lasagne.nonlinearities.softmax)

    network_output = get_output(l_output, {
        l_in_prem: x_p,
        l_in_hyp: x_h
    })  # Will have shape (batch_size, 3)
    f_dense_output = theano.function([x_p, x_h],
                                     network_output,
                                     on_unused_input='warn')

    # Compute cost
    if penalty == "l2":
        p_metric = l2
    elif penalty == "l1":
        p_metric = l1

    layers = lasagne.layers.get_all_layers(l_output)
    layer_dict = {l: reg_coeff for l in layers}
    reg_cost = reg_coeff * regularize_layer_params_weighted(
        layer_dict, p_metric)
    cost = T.mean(
        T.nnet.categorical_crossentropy(network_output,
                                        target_values).mean()) + reg_cost
    compute_cost = theano.function([x_p, x_h, target_values], cost)

    # Compute accuracy
    accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1),
                           T.argmax(target_values, axis=-1)),
                      dtype=theano.config.floatX)
    compute_accuracy = theano.function([x_p, x_h, target_values], accuracy)

    label_output = T.argmax(network_output, axis=-1)
    predict = theano.function([x_p, x_h], label_output)

    # Define update/train functions
    all_params = lasagne.layers.get_all_params(l_output, trainable=True)
    updates = lasagne.updates.rmsprop(cost, all_params, learn_rate)
    train = theano.function([x_p, x_h, target_values], cost, updates=updates)

    # TODO: Augment embedding layer to allow for masking inputs

    stats = Stats(exp_name)
    acc_num = 10

    #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size)
    minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size)
    print("Training ...")
    try:
        total_num_ex = 0
        for epoch in xrange(num_epochs):
            for _, minibatch in minibatches:
                total_num_ex += len(minibatch)
                stats.log("Processed {0} total examples in epoch {1}".format(
                    str(total_num_ex), str(epoch)))

                #prem_batch = val_prem[minibatch]
                #hyp_batch = val_hyp[minibatch]
                #labels_batch = val_labels[minibatch]

                prem_batch = train_prem[minibatch]
                hyp_batch = train_hyp[minibatch]
                labels_batch = train_labels[minibatch]

                train(prem_batch, hyp_batch, labels_batch)
                cost_val = compute_cost(prem_batch, hyp_batch, labels_batch)

                stats.recordCost(total_num_ex, cost_val)
                # Periodically compute and log train/dev accuracy
                if total_num_ex % (acc_num * batch_size) == 0:
                    train_acc = compute_accuracy(train_prem, train_hyp,
                                                 train_labels)
                    dev_acc = compute_accuracy(val_prem, val_hyp, val_labels)
                    stats.recordAcc(total_num_ex, train_acc, dataset="train")
                    stats.recordAcc(total_num_ex, dev_acc, dataset="dev")

    except KeyboardInterrupt:
        pass
Ejemplo n.º 27
0
def build_rotation_cnn(input_var=None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, 40, 40),
                                        input_var=input_var)
    # This time we do not apply input dropout, as it tends to work less well
    # for convolutional layers.

    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            #nonlinearity=lasagne.nonlinearities.sigmoid,
            nonlinearity=lasagne.nonlinearities.rectify,
            W = lasagne.init.Uniform(6.0/64))
    
    #network_middle_output = lasagne.layers.ReshapeLayer(network, shape = (([0], 41472)))
    # Expert note: Lasagne provides alternative convolutional layers that
    # override Theano's choice of which implementation to use; for details
    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.

    # Max-pooling layer of factor 2 in both dimensions:
    network = MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            # W = all_weights[2],
            # b = all_weights[3],
            W = lasagne.init.Uniform(6.0/64)
            #nonlinearity=lasagne.nonlinearities.sigmoid
            )


    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    

    nin_layer = Conv2DLayer(
            network, num_filters=32, filter_size=(1, 1),
            nonlinearity=lasagne.nonlinearities.rectify,
            # W = all_weights[2],
            # b = all_weights[3],
            W = lasagne.init.HeNormal()
            #nonlinearity=lasagne.nonlinearities.sigmoid
            )

    
    
    network_middle_output = lasagne.layers.ReshapeLayer(nin_layer, shape = (([0], 1568)))
    #network = Conv2DLayer(
    #        network, num_filters=32, filter_size=(1, 1),
    #        nonlinearity=lasagne.nonlinearities.rectify,
    #        W = lasagne.init.GlorotUniform()
    #        #nonlinearity=lasagne.nonlinearities.sigmoid
    #        )
    #network = Conv2DLayer(
    #        network, num_filters=32, filter_size=(1, 1),
    #        nonlinearity=lasagne.nonlinearities.rectify,
    #        W = lasagne.init.GlorotUniform()
    #        #nonlinearity=lasagne.nonlinearities.sigmoid
    #        )
    
    #network_middle_output = lasagne.layers.NonlinearityLayer(network_middle_output, nonlinearity = lasagne.nonlinearities.sigmoid)

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(nin_layer, p=.5),
            #network,
            W = all_weights[4], 
            b = all_weights[5],
            num_units=256,
            #nonlinearity=lasagne.nonlinearities.sigmoid
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            #network,
            W = all_weights[6],
            b = all_weights[7],
            num_units=10,
            nonlinearity=lasagne.nonlinearities.softmax)

    
    # Weight Decay
    weight_decay_layers = {nin_layer: 0.001}
    l1_penalty = regularize_layer_params_weighted(weight_decay_layers, l1)

    return network, network_middle_output, l1_penalty
Ejemplo n.º 28
0
    def evaluate(self, embedding, train_data, validation_data, test_data, num_classes):

        """

        Evaluates the 'embedding' using a neural network model on a training and validation dataset


        Parameters
        ----------
        embedding      :     An embedding which implements the Embedding interface
        train_data     ;     A tuple of lists (docs, y) that constitutes the training data
        validation_data:     A tuple of lists (docs, y) that constitutes the validation data
        test_data      :     A tuple of lists (docs, y) that constitutes the test data
        Returns        :     A float, with the top validation accuracy achieved
        -------

        """

        # The data
        input_docs_train = train_data[0]
        input_docs_val = validation_data[0]
        input_docs_test = test_data[0]
        Y_train = train_data[1]
        Y_val = validation_data[1]
        Y_test = test_data[1]

        # Fetch embeddings expression and represent the document as a sum of the words
        embeddings_var = embedding.get_embeddings_expr()
        doc_var = embeddings_var.sum(axis=0).dimshuffle('x',0)

        # Create theano symbolic variable for the target labels
        target_var = T.iscalar('target')

        # Build model using lasagne
        l_in = lasagne.layers.InputLayer((1, embedding.d), doc_var)
        l_hid = lasagne.layers.DenseLayer(l_in, num_units=120, nonlinearity=lasagne.nonlinearities.sigmoid)
        l_out = lasagne.layers.DenseLayer(l_hid, num_units=1,
                                          nonlinearity=lasagne.nonlinearities.sigmoid)
        # TODO: support multiclass

        # Create a loss expression for training, i.e., a scalar objective we want
        # to minimize
        prediction = lasagne.layers.get_output(l_out)
        prediction = T.clip(prediction, 1e-7, 1.0 - 1e-7) # Clip to prevent zero error, which causes nan
        loss = lasagne.objectives.binary_crossentropy(prediction, target_var).mean()
        l2_penalty = regularize_layer_params_weighted({l_hid: 0.001, l_out: 0.001}, l2)
        loss = loss + l2_penalty

        # Create update expression for training
        params = lasagne.layers.get_all_params(l_out, trainable=True) + embedding.get_update_parameter_vars()
        updates = lasagne.updates.sgd(loss, params, learning_rate=0.01)

        # Create an expression for the classification accuracy:
        test_acc = T.mean(T.eq(T.round(prediction), target_var), dtype=theano.config.floatX)

        # Compile a function performing a training step
        train_fn = theano.function([target_var] + embedding.get_variable_vars(), [loss, test_acc],
                                   updates=updates)

        # Compile a second function computing the validation loss and accuracy:
        val_fn = theano.function([target_var] + embedding.get_variable_vars(), [loss, test_acc])

        # Helper function for iterating the training set
        def iterate_data(input_docs, Y, shuffle=True):
            assert len(input_docs) == len(Y)
            if shuffle:
                indices = np.arange(len(input_docs))
                np.random.shuffle(indices)

            for i in range(len(input_docs)):
                excerpt = indices[i] if shuffle else i
                yield input_docs[excerpt], Y[excerpt]

        ## Perform the training
        patience = 20  # minimum epochs
        patience_increase = 2     # wait this much longer when a new best is found
        best_validation_loss = 0
        best_test_acc = 0.0
        improvement_threshold = 0.999  # a relative improvement of this much is considered significant
        print("Starting training...")
        for epoch in range(self.num_epochs):

            # Time it !
            start_time = time.time()

            # In each epoch, we do a full pass over the training data:
            train_err = 0
            train_acc = 0
            train_count = 0
            for doc, y in iterate_data(input_docs_train, Y_train, shuffle=False):

                words = doc.split(" ")
                if not any([embedding.has(word) for word in words]): continue # If no embeddings, skip this doc

                err, acc = train_fn(y, *embedding.get_variables(words))
                train_err += err
                train_acc += acc
                train_count += 1

                sys.stdout.write("\r" + "total train acc: \t{:.2f}".format(train_acc * 100 / train_count))

            # And a full pass over the validation data data again:
            val_err = 0
            val_acc = 0
            val_count = 0
            for doc, y in iterate_data(input_docs_val, Y_val, shuffle=False):

                words = doc.split(" ")
                if not any([embedding.has(word) for word in words]): continue # If no embeddings, skip this doc

                err, acc = val_fn(y, *embedding.get_variables(words))
                val_err += err
                val_acc += acc
                val_count += 1

            # Then we print the results for this epoch:
            sys.stdout.write("\r" + "Epoch {} of {} took {:.3f}s \n".format(
                epoch + 1, self.num_epochs, time.time() - start_time))
            print("  training accuracy:\t\t{:.2f} %".format( train_acc / train_count * 100))
            print("  validation accuracy:\t\t{:.2f} %".format( val_acc / val_count * 100))

            # Early stopping, if validation accuracy starts to decrease we stop
            if val_acc > best_validation_loss:

                # improve patience if loss improvement is good enough
                if val_acc > best_validation_loss * 1.0/improvement_threshold:
                    patience = max(patience, epoch * patience_increase)

                # We have a new peak validation accuracy, evaluate on test set
                best_validation_loss = val_acc
                test_err = 0
                test_acc = 0
                test_count = 0
                for doc, y in iterate_data(input_docs_test, Y_test, shuffle=False):

                    words = doc.split(" ")
                    if not any([embedding.has(word) for word in words]): continue # If no embeddings, skip this doc

                    err, acc = val_fn(y, *embedding.get_variables(words))
                    test_err += err
                    test_acc += acc
                    test_count += 1

                best_test_acc = test_acc / test_count
                print("  test accuracy:\t\t{:.2f} %".format( best_test_acc * 100))

            if patience <= epoch:
                break

        return best_test_acc
Ejemplo n.º 29
0
def build_mlp(input_var):
    l_in=lasagne.layers.InputLayer(shape=(None,2),input_var=input_var,W=theano.shared(np.random.normal(0, 0.01, (50, 100))))
    l_hid1 = lasagne.layers.DenseLayer(l_in, num_units=4,nonlinearity=lasagne.nonlinearities.sigmoid)
    l_out = lasagne.layers.DenseLayer(l_hid1, num_units=2,nonlinearity=lasagne.nonlinearities.sigmoid)
    return l_out
    
input_var = T.fmatrix('inputs')
target_var = T.ivector('targets')
network = build_mlp(input_var)
prediction = lasagne.layers.get_output(network,deterministic=True)

loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
layers = {build_mlp(input_var): 0.002}
l2_penalty = regularize_layer_params_weighted(layers, l2)
loss=loss-l2_penalty

params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.07, momentum=0.9)

test_prediction = lasagne.layers.get_output(network)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,target_var)
test_loss=test_loss-l2_penalty

test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),dtype=theano.config.floatX)

pred=T.eq(T.argmax(test_prediction, axis=1), target_var)

train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], [test_acc])
Ejemplo n.º 30
0
def main(model=MODEL,gradient = GRADIENT, num_epochs=NUM_EPOCHS, num_hidden_units = NUM_HIDDEN_UNITS, bnalg = BNALG, lr_start = LR_START):

    # Set the Initial Learning Rate; the Final Learning Rate and the number of training epochs
    LR_start= lr_start
    LR_fin = 0.01
    epochs=num_epochs
    # LR_decay = (LR_fin/LR_start)**(1./epochs)
    LR_decay = 1

    print("Generating the ImageDataGenerator")
    #Define the Image Data Generator, which is used for real-time data augmentation while training
    datagen = ImageDataGenerator(
              featurewise_center=False,  # set input mean to 0 over the dataset
              samplewise_center=False,  # set each sample mean to 0
              featurewise_std_normalization=False,  # divide inputs by std of the dataset
              samplewise_std_normalization=False,  # divide each input by its std
              zca_whitening=False,  # apply ZCA whitening
              rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
              width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
              height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
              horizontal_flip=True,  # randomly flip images
              vertical_flip=False)  # randomly flip images

    #Define Theano tensor variables for input, the labels and the learning rate
    input=T.tensor4('input')
    target=T.ivector('target')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    #Define the Network
    print("Generating the cnn network")
    net=cnn_network(input, bnalg)

    #Define Training Output Variables

    print("Compiling the functions")

    train_output=lasagne.layers.get_output(net['l_out'],input,deterministic=False) ## Get the class probabilities
    train_pred=train_output.argmax(-1) ## Get the predicted class label
    train_loss=T.mean(lasagne.objectives.categorical_crossentropy(train_output,target)) #Using Cross-Entropy Loss
    train_err=T.mean(T.neq(T.argmax(train_output,axis=1), target),dtype=theano.config.floatX) #Compute the mean training precdiction error

    # Define Validation Output Variables

    val_output=lasagne.layers.get_output(net['l_out'],input,deterministic=True)
    val_loss=T.mean(lasagne.objectives.categorical_crossentropy(val_output,target))
    val_err = T.mean(T.neq(T.argmax(val_output,axis=1), target),dtype=theano.config.floatX)
    val_pred=val_output.argmax(-1)

    # Set L2 regularization coefficient
    layers={}
    for k in net.keys():
        layers[net[k]]=0.0005

    l2_penalty = regularize_layer_params_weighted(layers, l2)
    train_loss=train_loss+l2_penalty

    #Define the Gradient Update Rule
    print("Compiling the functions: extract params")

    params = lasagne.layers.get_all_params(net['l_out'], trainable=True) #Get list of all trainable network parameters
    # bnparams = lasagne.layers.get_all_params(net['l_out'], trainable=False) #Get list of all BN untrainable network parameters

    if gradient == "adagrad":
        updates = lasagne.updates.adagrad(loss_or_grads=train_loss, params=params, learning_rate=LR) 
        ## Use Adagrad Gradient Descent Learning Algorithm
    elif gradient == "rmsprop":
        updates = lasagne.updates.rmsprop(loss_or_grads=train_loss, params=params, learning_rate=LR) 
    elif gradient == "sgd":
        updates = lasagne.updates.sgd(loss_or_grads=train_loss, params=params, learning_rate=LR) 
    else:
        print("Invalid gradient name")

    #Define Theano Functions for Training and Validation

    #Theano Function for Training
    print("Compiling the functions: define train function")

    f_train=theano.function([input,target,LR],[train_loss,train_err],updates=updates,allow_input_downcast=True)

    # Theano Function for Validation
    
    print("Compiling the functions: define val function")
    f_val=theano.function([input,target],[val_loss,val_err],allow_input_downcast=True)

    # f_get_params=theano.function([LR],[bnparams],allow_input_downcast=True)

    #Begin Training
    print("Beging Training")
    train_stats,val_stats,per_epoch_params=batch_train(datagen,f_train,f_val,net['l_out'],LR_start,LR_decay,epochs=epochs,\
        data_dir="../../data/cifar-10-batches-py/",train_bool=True)

    #output data
    list_epoch      = [i[0] for i in val_stats]
    list_val_loss   = [i[1] for i in val_stats]
    list_val_err    = [i[2] for i in val_stats]
    list_val_acc    = [1-i  for i in list_val_err]

    epoch_mu        = [i[0] for i in per_epoch_params]
    epoch_lambda    = [i[1] for i in per_epoch_params]
    
    # epoch_params_mu = [i[0] for i in epoch_params]
    # epoch_params_std= [i[1] for i in epoch_params]

    np.savetxt(OUTPUT_DATA_PATH+model+"_"+gradient+"_"+str(num_epochs)+"_"+bnalg+"_"+"epoch.txt",list_epoch)
    np.savetxt(OUTPUT_DATA_PATH+model+"_"+gradient+"_"+str(num_epochs)+"_"+bnalg+"_"+"loss_val.txt",list_val_loss)
    np.savetxt(OUTPUT_DATA_PATH+model+"_"+gradient+"_"+str(num_epochs)+"_"+bnalg+"_"+"acc_val.txt",list_val_acc)
    np.savetxt(OUTPUT_DATA_PATH+model+"_"+gradient+"_"+str(num_epochs)+"_"+bnalg+"_"+"err_val.txt",list_val_err)
    np.savetxt(OUTPUT_DATA_PATH+model+"_"+gradient+"_"+str(num_epochs)+"_"+bnalg+"_"+"params_mu.txt",epoch_mu)
    np.savetxt(OUTPUT_DATA_PATH+model+"_"+gradient+"_"+str(num_epochs)+"_"+bnalg+"_"+"params_std.txt",epoch_lambda)
    print ("Data saved...")
def build_cnn(input_var=None, batch_size = None, class_num=10):

    # Input layer, as usual:
    l_in = lasagne.layers.InputLayer(shape=(batch_size, 1, 40, 40),
                                        input_var=input_var)

    loc_network_list = []
    for i in range(class_num):
        loc_l1 = MaxPool2DLayer(l_in, pool_size=(2, 2))
        loc_l2 = Conv2DLayer(
            loc_l1, num_filters=20, filter_size=(5, 5), W=lasagne.init.HeUniform('relu'), name = "loc_l2_%d" %i)
        loc_l3 = MaxPool2DLayer(loc_l2, pool_size=(2, 2))
        loc_l4 = Conv2DLayer(loc_l3, num_filters=20, filter_size=(5, 5), W=lasagne.init.HeUniform('relu'), name = "loc_l4_%d" %i)
        loc_l5 = lasagne.layers.DenseLayer(
            loc_l4, num_units=50, W=lasagne.init.HeUniform('relu'), name = "loc_l5_%d" %i)
        loc_out = lasagne.layers.DenseLayer(
            loc_l5, num_units=1, W=lasagne.init.Constant(0.0), 
            nonlinearity=lasagne.nonlinearities.identity, name = "loc_out_%d" %i)
        # Transformer network
        l_trans1 = RotationTransformationLayer(l_in, loc_out)
        print "Transformer network output shape: ", l_trans1.output_shape
        loc_network_list.append(l_trans1)
    network_transformed = lasagne.layers.ConcatLayer(loc_network_list, axis = 1)

    network_transformed = lasagne.layers.ReshapeLayer(network_transformed, (-1, 1, 40, 40))

    conv_1 = Conv2DLayer(
        network_transformed, num_filters=32, filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform())

    # Max-pooling layer of factor 2 in both dimensions:
    network = MaxPool2DLayer(conv_1, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    conv_2 = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W = lasagne.init.GlorotUniform()
            #nonlinearity=lasagne.nonlinearities.sigmoid
            )
    network = lasagne.layers.MaxPool2DLayer(conv_2, pool_size=(2, 2))

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    fc1  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            #network,
            num_units=256,
            #nonlinearity=lasagne.nonlinearities.sigmoid
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    fc2  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(fc1, p=.5),
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=10,
            )

    fc2_selected = SelectLayer(fc2, 10)
    # fc2_selected = lasagne.layers.NonlinearityLayer(fc2_selected, nonlinearity=lasagne.nonlinearities.softmax)

    #network_transformed = lasagne.layers.ReshapeLayer(network_transformed, (-1, 10, 10, 40, 40))

    weight_decay_layers = {fc1:0.0, fc2:0.002}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return fc2_selected, l2_penalty, network_transformed, [conv_1, conv_2, fc1, fc2] 
Ejemplo n.º 32
0
def sgd_optimization(NNInput):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear model

    :type LearningRate: float
    :param LearningRate: learning rate used (factor for the stochastic gradient)

    :type NEpoch: int
    :param NEpoch: maximal number of epochs to run the optimizer

    :type PathToData: string
    :param PathToData: the path of the dataset file

    """

    def normalized_squared_error(a, b, expon):
        """Computes the element-wise squared normalized difference between two tensors.
        .. math:: L = ( (p - t) / t )^2
        Parameters
        ----------
        a, b : Theano tensor
            The tensors to compute the squared difference between.
        Returns
        -------
        Theano tensor
            An expression for the item-wise squared difference.
        """
        a, b = align_targets(a, b)
        return T.square((a - b) / T.abs_(b)**expon)
        # / T.abs_(TargetVar)**(0.0) / T.abs_(b)**expon



    def weighted_squared_error(a, b, Shift, Power):
        """Computes the element-wise squared normalized difference between two tensors.
        .. math:: L = ( (p - t) / t )^2
        Parameters
        ----------
        a, b : Theano tensor
            The tensors to compute the squared difference between.
        Returns
        -------
        Theano tensor
            An expression for the item-wise squared difference.
        """
        a, b = align_targets(a, b)
        Vi   = T.maximum(b, Shift)
        w    = T.power(Shift/b, Power)
        return w * T.square(a - b)
        # / T.abs_(TargetVar)**(0.0) / T.abs_(b)**expon



    def align_targets(predictions, targets):
        """Helper function turning a target 1D vector into a column if needed.
        This way, combining a network of a single output unit with a target vector
        works as expected by most users, not broadcasting outputs against targets.
        Parameters
        ----------
        predictions : Theano tensor
            Expression for the predictions of a neural network.
        targets : Theano tensor
            Expression or variable for corresponding targets.
        Returns
        -------
        predictions : Theano tensor
            The predictions unchanged.
        targets : Theano tensor
            If `predictions` is a column vector and `targets` is a 1D vector,
            returns `targets` turned into a column vector. Otherwise, returns
            `targets` unchanged.
        """
        if (getattr(predictions, 'broadcastable', None) == (False, True) and
                getattr(targets, 'ndim', None) == 1):
            targets = as_theano_expression(targets).dimshuffle(0, 'x')
        return predictions, targets


    ##################################################################################################################################
    ### LOADING DATA
    ##################################################################################################################################
    print('\nLoading Data ... \n')

    if (NNInput.TryNNFlg > 0):
        datasets, datasetsTry, G_MEAN, G_SD, RDataOrig, yDataOrig, yDataDiatOrig = load_data(NNInput)
    else:
        datasets, G_MEAN, G_SD, RDataOrig, yDataOrig, yDataDiatOrig = load_data(NNInput)

    RSetTrain, GSetTrain, ySetTrain, ySetTrainDiat, ySetTrainTriat = datasets[0]
    RSetValid, GSetValid, ySetValid, ySetValidDiat, ySetValidTriat = datasets[1]
    #RSetTest,  GSetTest,  ySetTest,  ySetTestDiat,  ySetTestTriat  = datasets[2]


    #plot_set(NNInput, RSetTrain.get_value(), ySetTrainDiat.get_value(), RSetValid.get_value(), ySetValidDiat.get_value(), RSetTest.get_value(), ySetTestDiat.get_value())
    
    NNInput.NIn  = RSetTrain.get_value(borrow=True).shape[1]
    NNInput.NOut = ySetTrain.get_value(borrow=True).shape[1] 
    print(('    Nb of Input:  %i')    % NNInput.NIn)
    print(('    Nb of Output: %i \n') % NNInput.NOut)
    if (NNInput.Model=='ModPIP') or (NNInput.Model=='PIP'):
        NNInput.NLayers = NNInput.NHid
        NNInput.NLayers.insert(0,NNInput.NIn)
        NNInput.NLayers.append(NNInput.NOut)

    NTrain      = RSetTrain.get_value(borrow=True).shape[0]
    NBatchTrain = NTrain // NNInput.NMiniBatch
    NValid      = RSetValid.get_value(borrow=True).shape[0]
    #NTest       = RSetTest.get_value(borrow=True).shape[0]
    print(('    Nb of Training   Examples: %i')    % NTrain)
    print(('    Nb of Training   Batches:  %i') % NBatchTrain)
    print(('    Nb of Validation Examples: %i')    % NValid)
    #print(('    Nb of Test       Examples: %i \n') % NTest)



    ######################
    # BUILD ACTUAL MODEL #
    ######################  
    InputVar  = T.dmatrix('Inputs')
    #InputVar.tag.test_value  = numpy.random.randint(100,size=(100,3))
    InputVar.tag.test_value  = numpy.array([[1.0,2.0,7.0],[3.0,5.0,11.0]]) * 0.529177
    TargetVar = T.dmatrix('Targets')
    #TargetVar.tag.test_value = numpy.random.randint(100,size=(100,1))


    Layers = create_nn(NNInput, InputVar, TargetVar)

    TrainPrediction  = lasagne.layers.get_output(Layers[-1])
    if (NNInput.LossFunction == 'squared_error'):
        TrainError       = T.sqr(TrainPrediction - TargetVar)
        TrainLoss        = lasagne.objectives.squared_error(TrainPrediction, TargetVar)
    elif (NNInput.LossFunction == 'normalized_squared_error'):
        TrainError       = T.abs_( (TrainPrediction - TargetVar) / T.abs_(TargetVar)**NNInput.OutputExpon)
        TrainLoss        = normalized_squared_error(TrainPrediction, TargetVar, NNInput.OutputExpon)
    elif (NNInput.LossFunction == 'huber_loss'):
        TrainError       = T.abs_( (TrainPrediction - TargetVar) )
        TrainLoss        = lasagne.objectives.huber_loss(TrainPrediction, TargetVar, delta=5)
    elif (NNInput.LossFunction == 'weighted_squared_error'):
        TrainError       = T.abs_( (TrainPrediction - TargetVar) )
        TrainLoss        = weighted_squared_error(TrainPrediction, TargetVar, NNInput.Shift, NNInput.Power)

    if (NNInput.Model == 'ModPIP'):
        LayersK          = {Layers[2]: 1.0, Layers[3]: 1.0}
    elif (NNInput.Model=='ModPIPPol'):
        LayersK          = {Layers[1]: 1.0}
    elif (NNInput.Model=='PIP'):
        LayersK          = {Layers[0]: 1.0, Layers[1]: 1}
    L2Penalty        = regularize_layer_params_weighted(LayersK, l2)
    L1Penalty        = regularize_layer_params_weighted(LayersK, l1)
    #TrainLoss        = TrainLoss
    TrainLoss        = TrainLoss.mean() + NNInput.kWeightDecay[0] * L1Penalty + NNInput.kWeightDecay[1] * L2Penalty

    params           = lasagne.layers.get_all_params(Layers[-1], trainable=True)
    if (NNInput.Method == 'nesterov'):
        updates          = lasagne.updates.nesterov_momentum(TrainLoss, params, learning_rate=NNInput.LearningRate, momentum=NNInput.kMomentum)
    elif (NNInput.Method == 'rmsprop'):
        updates          = lasagne.updates.rmsprop(TrainLoss, params, learning_rate=NNInput.LearningRate, rho=NNInput.RMSProp[0], epsilon=1e-06)
    elif (NNInput.Method == 'adamax'):
        updates          = lasagne.updates.adamax(TrainLoss, params, learning_rate=NNInput.LearningRate, beta1=0.9, beta2=0.999, epsilon=1e-08)
    elif (NNInput.Method == 'amsgrad'):
        updates          = lasagne.updates.amsgrad(TrainLoss, params, learning_rate=NNInput.LearningRate, beta1=0.9, beta2=0.999, epsilon=1e-08)
    elif (NNInput.Method == 'adam'):
        updates          = lasagne.updates.adam(TrainLoss, params, learning_rate=NNInput.LearningRate, beta1=0.9, beta2=0.999, epsilon=1e-08)
    elif (NNInput.Method == 'adadelta'):
        updates          = lasagne.updates.adadelta(TrainLoss, params, learning_rate=NNInput.LearningRate, rho=0.95, epsilon=1e-08)
    TrainFn = theano.function(inputs=[InputVar, TargetVar], outputs=[TrainError, TrainLoss], updates=updates)


    ValidPrediction = lasagne.layers.get_output(Layers[-1], deterministic=True)

    if (NNInput.LossFunction == 'squared_error'):
        ValidError      = T.sqr(ValidPrediction - TargetVar)
    elif (NNInput.LossFunction == 'normalized_squared_error'):
        ValidError      = T.sqr((ValidPrediction - TargetVar) / TargetVar)
        ValidError      = T.sqrt(ValidError.mean())
    elif (NNInput.LossFunction == 'huber_loss'):
        ValidError      = T.sqr(ValidPrediction - TargetVar)
        ValidError      = T.sqrt(ValidError.mean())
    elif (NNInput.LossFunction == 'weighted_squared_error'):
        Vi              = T.maximum(ValidPrediction, NNInput.Shift)
        w               = T.power(NNInput.Shift/TargetVar, NNInput.Power)
        ValidError      = w * T.sqr(ValidPrediction - TargetVar)
        ValidError      = T.sqrt(ValidError.mean())
    ValFn   = theano.function(inputs=[InputVar, TargetVar], outputs=ValidError)


    ###############
    # TRAIN MODEL #
    ###############
    print('\n\nTRAINING ... ')

    if (NNInput.fvalid < 0):
        fValid = NBatchTrain * numpy.absolute(NNInput.fvalid)
    else:
        fValid = NNInput.fvalid
    BestValidError        = numpy.inf
    BestIter              = 0
    TestScore             = 0.
    tStart                = timeit.default_timer()
    iEpoch                = 0
    LoopingFlg            = True
    iIterTot              = 0
    Train                 = []
    TrainEpochVec         = []
    Valid                 = []
    ValidEpochVec         = []
    iTry                  = 0

   
    if (NNInput.Model=='ModPIP') or (NNInput.Model == 'ModPIPPol'):
        xSetTrain = RSetTrain
        xSetValid = RSetValid
        #xSetTest  = RSetTest
        xDataOrig = RDataOrig
    elif (NNInput.Model == 'PIP'):
        xSetTrain = GSetTrain
        xSetValid = GSetValid
        #xSetTest  = GSetTest
        #xDataOrig = GDataOrig
    # print(xSetTrain)
    # print(xSetValid)
    # print(xSetTest)
    # print(xDataOrig)
    # print(ySetTrain.get_value())
    # print(ySetValid.get_value())
    # print(ySetTest.get_value())
    # print(yDataOrig)
    # time.sleep(5)

    ThisTrainError = 0.0
    while (iEpoch < NNInput.NEpoch) and (LoopingFlg):
        iEpoch += 1

        iMiniBatch    = 0
        TrainErrorVec = [] 
        for TrainBatch in iterate_minibatches(xSetTrain, ySetTrain, NNInput.NMiniBatch, shuffle=True):
            iMiniBatch += 1
            iIterTot    = (iEpoch - 1) * NBatchTrain + iMiniBatch
            TrainInputs, TrainTargets          = TrainBatch
            [TrainErrorTemp, MiniBatchAvgCost] = TrainFn(TrainInputs, TrainTargets)
            TrainErrorVec                      = numpy.append(TrainErrorVec, TrainErrorTemp)


            if (iIterTot + 1) % fValid == 0:

                ValidErorrVec = []
                for ValidBatch in iterate_minibatches(xSetValid, ySetValid, NValid, shuffle=False):
                    ValidInputs, ValidTargets = ValidBatch
                    ValidErorrVec             = numpy.append(ValidErorrVec, ValFn(ValidInputs, ValidTargets))
                
                ThisValidError = numpy.sqrt( numpy.mean(ValidErorrVec) )
                ValidEpochVec  = numpy.append(ValidEpochVec, iEpoch)
                Valid          = numpy.append(Valid, ThisValidError)

                # fig = plt.figure()
                # plt.plot(ValidErorrVec, color='lightblue', linewidth=3)
                # #ax.set_xlim(,)
                # plt.show()

                print( '\n    iEpoch %i, minibatch %i/%i, training error %f, validation error %f' % (iEpoch, iMiniBatch + 1, NBatchTrain, ThisTrainError, ThisValidError) )

                # if we got the best validation score until now
                if ThisValidError < BestValidError:
                    #improve patience if loss improvement is good enough
                    #if (ThisValidError < BestValidError * NNInput.ImpThold):
                        # NNInput.NPatience = max(NNInput.NPatience, iIterTot * NNInput.NDeltaPatience)

                    BestValidError = ThisValidError
                    BestIter       = iIterTot

                    # # test it on the test set
                    # TestErrorVec = []
                    # for TestBatch in iterate_minibatches(xSetTest, ySetTest, NTest, shuffle=False):
                    #     TestInputs, TestTargets = TestBatch
                    #     TestErrorVec = numpy.append(TestErrorVec, ValFn(TestInputs, TestTargets))
                    # TestScore  = numpy.mean(TestErrorVec)

                    # print(('        iEpoch %i, minibatch %i/%i, test error of best model %f') % (iEpoch, iMiniBatch + 1, NBatchTrain, TestScore))
                    print('        iEpoch %i, minibatch %i/%i, Best so far')


                    if (NNInput.WriteFinalFlg > 0):
                        
                        for iLayer in range(len(NNInput.NLayers)-1):

                            PathToFldr = NNInput.PathToOutputFldr + Layers[iLayer].name + '/'
                            if not os.path.exists(PathToFldr):
                                os.makedirs(PathToFldr)
                            PathToFile = PathToFldr + 'Weights.npz'
                            numpy.savez(PathToFile, *lasagne.layers.get_all_param_values(Layers[iLayer]))

                            if (NNInput.WriteFinalFlg > 1):
                                if (NNInput.Model == 'ModPIP'):
                                    if (iLayer == 0) and (NNInput.BondOrderStr != 'DiatPotFun'):
                                        save_parameters_PIP(PathToFldr, Layers[iLayer].Lambda.get_value(), Layers[iLayer].re.get_value())
                                    elif (iLayer > 1):
                                        if (NNInput.BiasesFlg):
                                            save_parameters(PathToFldr, Layers[iLayer].W.get_value(), Layers[iLayer].b.get_value())
                                        else:
                                            save_parameters_NoBiases(PathToFldr, Layers[iLayer].W.get_value())
                                elif (NNInput.Model == 'ModPIPPol'):
                                    if (iLayer == 0) and (NNInput.BondOrderStr != 'DiatPotFun'):
                                        save_parameters_PIP(PathToFldr, Layers[iLayer].Lambda.get_value(), Layers[iLayer].re.get_value())
                                    elif (iLayer==1):
                                        save_parameters_NoBiases(PathToFldr, Layers[iLayer].W.get_value())
                                elif (NNInput.Model == 'PIP'):
                                    if (NNInput.BiasesFlg):
                                        save_parameters(PathToFldr, Layers[iLayer].W.get_value(), Layers[iLayer].b.get_value())
                                    else:
                                        save_parameters_NoBiases(PathToFldr, Layers[iLayer].W.get_value())


                        if (NNInput.TryNNFlg > 1):
                            i=-1
                            for Ang in NNInput.AngVector:
                                i=i+1
                                iTry=iTry+1
                                RSetTry, GSetTry, ySetTry, ySetTryDiat, ySetTryTriat  = datasetsTry[i]
                                if (NNInput.Model == 'ModPIP') or (NNInput.Model == 'ModPIPPol'):
                                    xSetTry = RSetTry
                                elif (NNInput.Model == 'PIP'):
                                    xSetTry = GSetTry
                                NTry                  = xSetTry.get_value(borrow=True).shape[0]
                                NBatchTry             = NTry // NNInput.NMiniBatch
                                yPredTry = lasagne.layers.get_output(Layers[-1], inputs=xSetTry) 
                                if  (NNInput.TryNNFlg > 2):
                                    PathToTryLabels = NNInput.PathToOutputFldr + '/REBestDet.csv.' + str(iTry)
                                else:
                                    PathToTryLabels = NNInput.PathToOutputFldr + '/REBestDet.csv.' + str(Ang)
                                yPredTry = T.cast(yPredTry, 'float64')
                                yPredTry = yPredTry.eval()
                                yPredTry = InverseTransformation(NNInput, yPredTry, ySetTryDiat.get_value())
                                ySetTry = T.cast(ySetTry, 'float64')
                                ySetTry = ySetTry.eval()
                                ySetTry = InverseTransformation(NNInput, ySetTry, ySetTryDiat.get_value())
                                save_to_plot(PathToTryLabels, 'Evaluated', numpy.concatenate((RSetTry.get_value(), ySetTry, yPredTry), axis=1))
                    
        TrainEpochVec   = numpy.append(TrainEpochVec, iEpoch)
        ThisTrainError  = numpy.sqrt( numpy.mean(TrainErrorVec) )
        Train           = numpy.append(Train, ThisTrainError)


    #############################################################################################################
    ### LOADING THE OPTIMAL PARAMETERS
    for iLayer in range(len(NNInput.NLayers)-1):

        PathToFldr = NNInput.PathToWeightFldr + Layers[iLayer].name + '/'
        print(' Loading Parameters for Layer ', iLayer, ' from File ', PathToFldr)
        if (NNInput.Model == 'ModPIP'):
            if (iLayer == 0) and (NNInput.BondOrderStr != 'DiatPotFun'):
                save_parameters_PIP(PathToFldr, Layers[iLayer].Lambda.get_value(), Layers[iLayer].re.get_value())
            elif (iLayer > 1):
                if (NNInput.BiasesFlg):
                    save_parameters(PathToFldr, Layers[iLayer].W.get_value(), Layers[iLayer].b.get_value())
                else:
                    save_parameters_NoBiases(PathToFldr, Layers[iLayer].W.get_value())
        elif (NNInput.Model == 'ModPIPPol'):
            if (iLayer == 0) and (NNInput.BondOrderStr != 'DiatPotFun'):
                save_parameters_PIP(PathToFldr, Layers[iLayer].Lambda.get_value(), Layers[iLayer].re.get_value())
            elif (iLayer==1):
                save_parameters_NoBiases(PathToFldr, Layers[iLayer].W.get_value())
        elif (NNInput.Model == 'PIP'):
            save_parameters(PathToFldr, Layers[iLayer].W.get_value(), Layers[iLayer].b.get_value())


    #############################################################################################################
    ### Evaluating Model for a Particular Data-Set
    if (NNInput.TryNNFlg > 0):
        i=-1
        for Ang in NNInput.AngVector:
            i=i+1
            RSetTry, GSetTry, ySetTry, ySetTryDiat, ySetTryTriat  = datasetsTry[i]
            if (NNInput.Model == 'ModPIP') or (NNInput.Model == 'ModPIPPol'):
                xSetTry = RSetTry
            elif (NNInput.Model == 'PIP'):
                xSetTry = GSetTry
            NTry                  = xSetTry.get_value(borrow=True).shape[0]
            NBatchTry             = NTry // NNInput.NMiniBatch
            yPredTry = lasagne.layers.get_output(Layers[-1], inputs=xSetTry) 
            PathToTryLabels = NNInput.PathToOutputFldr + '/REBestDet.csv.' + str(Ang)
            yPredTry = T.cast(yPredTry, 'float64')
            yPredTry = yPredTry.eval()
            yPredTry = InverseTransformation(NNInput, yPredTry, ySetTryDiat.get_value())
            ySetTry = T.cast(ySetTry, 'float64')
            ySetTry = ySetTry.eval()
            ySetTry = InverseTransformation(NNInput, ySetTry, ySetTryDiat.get_value())
            save_to_plot(PathToTryLabels, 'Evaluated', numpy.concatenate((RSetTry.get_value(), ySetTry, yPredTry), axis=1))


    #############################################################################################################
    ### COMPUTING ERRORS
    ySetTrain = InverseTransformation(NNInput, ySetTrain.get_value(), ySetTrainDiat.get_value())
    ySetValid = InverseTransformation(NNInput, ySetValid.get_value(), ySetValidDiat.get_value())
    #ySetTest  = InverseTransformation(NNInput, ySetTest.get_value(),  ySetTestDiat.get_value())

    yPredTrain  = lasagne.layers.get_output(Layers[-1], inputs=xSetTrain) 
    yPredTrain  = T.cast(yPredTrain, 'float64')
    yPredTrain  = yPredTrain.eval()
    yPredTrain  = InverseTransformation(NNInput, yPredTrain, ySetTrainDiat.get_value())
    error_Train = ySetTrain - yPredTrain 
    plot_error(NNInput, error_Train, 'Train')

    yPredValid  = lasagne.layers.get_output(Layers[-1], inputs=xSetValid) 
    yPredValid  = T.cast(yPredValid, 'float64')
    yPredValid  = yPredValid.eval()
    yPredValid  = InverseTransformation(NNInput, yPredValid, ySetValidDiat.get_value())
    error_Valid = ySetValid - yPredValid
    plot_error(NNInput, error_Valid, 'Valid') 

    # yPredTest   = lasagne.layers.get_output(Layers[-1], inputs=xSetTest) 
    # yPredTest   = T.cast(yPredTest, 'float64')
    # yPredTest   = yPredTest.eval()
    # yPredTest   = InverseTransformation(NNInput, yPredTest, ySetTestDiat.get_value())
    # error_Test  = ySetTest - yPredTest
    # plot_error(NNInput, error_Test, 'Test')

    # plot_set(NNInput, RSetTrain.get_value(), ySetTrain, RSetValid.get_value(), ySetValid, RSetTest.get_value(), ySetTest)


    yPredOrig   = lasagne.layers.get_output(Layers[-1], inputs=xDataOrig) 
    yPredOrig   = T.cast(yPredOrig, 'float64')
    yPredOrig   = yPredOrig.eval()
    yPredOrig   = InverseTransformation(NNInput, yPredOrig, yDataDiatOrig)
    plot_scatter(NNInput, yPredOrig, yDataOrig)
    #plot_overall_error(NNInput, yPredOrig, yDataOrig)

    plot_history(NNInput, TrainEpochVec, Train, ValidEpochVec, Valid)


    tEnd = timeit.default_timer()
    print(('\nOptimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (BestValidError, BestIter + 1, TestScore))
    print(('\nThe code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((tEnd - tStart) / 60.)), file=sys.stderr)
Ejemplo n.º 33
0
def build_cnn(input_var=None):
    """Build the CIFAR-10 model.

    Args:
    images: Images returned from distorted_inputs() or inputs().

    Returns:
    Logits.
    """
    # We instantiate all variables using tf.get_variable() instead of
    # tf.Variable() in order to share variables across multiple GPU training runs.
    # If we only ran this model on a single GPU, we could simplify this function
    # by replacing all instances of tf.get_variable() with tf.Variable().
    #

    input_layer = InputLayer((None, 3, IMAGE_SIZE, IMAGE_SIZE), input_var=input_var)
    
    # conv1
    conv1 = Conv2DLayer(input_layer, num_filters=64, filter_size=(5,5),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1")
    
    # pool1
    pool1 = MaxPool2DLayer(conv1, pool_size=(3, 3), stride=(2, 2), pad=1)

    # norm1
    norm1 = LocalResponseNormalization2DLayer(pool1, alpha=0.001 / 9.0,
                                              beta=0.75, k=1.0, n=9)
    
    # conv2
    conv2 = Conv2DLayer(norm1, num_filters=64, filter_size=(5,5),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2')

    # norm2
    norm2 = LocalResponseNormalization2DLayer(conv2, alpha=0.001 / 9.0,
                                              beta=0.75, k=1.0, n=9)
    
    # pool2
    pool2 = MaxPool2DLayer(norm2, pool_size=(3, 3), stride=(2, 2), pad=1)
    
    # fc1
    fc1 = DenseLayer(pool2, num_units=384,
                     nonlinearity=lasagne.nonlinearities.rectify,
                     W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1),
                     name="fc1")

    # fc2
    fc2 = DenseLayer(fc1, num_units=192,
                     nonlinearity=lasagne.nonlinearities.rectify,
                     W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1),
                     name="fc2")

    # fc3
    softmax_layer = DenseLayer(fc2, num_units=10,
                               nonlinearity=lasagne.nonlinearities.softmax,
                               W=lasagne.init.HeNormal(),
                               b=lasagne.init.Constant(0.0),
                               name="softmax")

    # Weight Decay
    weight_decay_layers = {fc1: 0.002, fc2: 0.002}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return softmax_layer, l2_penalty
def build_network_MyModel(
    args, input1_var, input1_mask_var, input2_var, intut2_mask_var, wordEmbeddings, maxlen=36, reg=0.5 * 1e-4
):

    # need use theano.scan
    print ("Building model LSTM + Featue Model + 2D Convolution +MLP")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]
    GRAD_CLIP = wordDim

    input_1 = InputLayer((None, maxlen), input_var=input1_var)
    batchsize, seqlen = input_1.input_var.shape
    input_1_mask = InputLayer((None, maxlen), input_var=input1_mask_var)
    emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_1.params[emb_1.W].remove("trainable")
    lstm_1 = LSTMLayer(
        emb_1, num_units=args.lstmDim, mask_input=input_1_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh
    )

    input_2 = InputLayer((None, maxlen), input_var=input2_var)
    input_2_mask = InputLayer((None, maxlen), input_var=input2_mask_var)
    emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb_2.params[emb_2.W].remove("trainable")
    lstm_2 = LSTMLayer(
        emb_2, num_units=args.lstmDim, mask_input=input_2_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh
    )

    # print "LSTM shape", get_output_shape(lstm_2) # LSTM shape (None, 36, 150)
    cos_feats = CosineSimLayer([lstm_1, lstm_2])
    print "SSSS", get_output_shape(cos_feats)

    # lstm_1 = SliceLayer(lstm_1, indices=slice(-6, None), axis=1)
    # lstm_2 = SliceLayer(lstm_2, indices=slice(-6, None), axis=1)

    # concat = ConcatLayer([lstm_1, lstm_2],axis=2) #(None, 36, 300)

    """

    num_filters = 32
    stride = 1 
    """
    filter_size = (10, 10)
    pool_size = (4, 4)
    """

    filter_size=(3, 10)
    pool_size=(2,2)

    reshape = ReshapeLayer(concat, (batchsize, 1, 6, 2*args.lstmDim))

    conv2d = Conv2DLayer(reshape, num_filters=num_filters, filter_size=filter_size,
            nonlinearity=rectify,W=GlorotUniform())
    
    maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size) #(None, 32, 6, 72)


    """
    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    # conv2d = Conv2DLayer(maxpool, num_filters=32, filter_size=(5, 5), nonlinearity=rectify)
    # maxpool = MaxPool2DLayer(conv2d, pool_size=(2, 2))

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    # hid = DenseLayer(DropoutLayer(maxpool, p=.2),num_units=128,nonlinearity=rectify)
    hid = DenseLayer(cos_feats, num_units=10, nonlinearity=sigmoid)

    if args.task == "sts":
        network = DenseLayer(hid, num_units=5, nonlinearity=logsoftmax)

    elif args.task == "ent":
        network = DenseLayer(hid, num_units=3, nonlinearity=logsoftmax)

    layers = {lstm_1: reg, hid: reg, network: reg}
    penalty = regularize_layer_params_weighted(layers, l2)

    input_dict = {
        input_1: input1_var,
        input_2: input2_var,
        input_1_mask: input1_mask_var,
        input_2_mask: input2_mask_var,
    }

    return network, penalty, input_dict
Ejemplo n.º 35
0
def build_cnn(input_var=None, batch_size = None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(None, 1, 227, 227),
                                        input_var=input_var)

    repeatInput = Repeat(network, 61)

    network = lasagne.layers.ReshapeLayer(repeatInput, (-1, 1, 227, 227))
    
    network_transformed = RotationTransformationLayer(network, batch_size * 61)

    network = Conv2DLayer(
            network_transformed, num_filters=96, filter_size=(11, 11),
            stride=(4,4),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())

    network = LRN(network, alpha = 0.0001, beta = 0.75, n = 5)

    # Max-pooling layer of factor 2 in both dimensions:
    network = MaxPool2DLayer(network, pool_size=(3, 3), stride=(2,2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = Conv2DLayer(
            network, num_filters=256, filter_size=(5, 5),
            pad = 2,
            nonlinearity=lasagne.nonlinearities.rectify,
            W = lasagne.init.GlorotUniform()
            #nonlinearity=lasagne.nonlinearities.sigmoid
            )
    
    network = LRN(network, alpha = 0.0001, beta = 0.75, n = 5)
    
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3, 3), stride = (2, 2))
    

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    fc1  = lasagne.layers.DenseLayer(
            network,
            num_units=256,
            W = lasagne.init.Normal(0.01),
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    fc2  = lasagne.layers.DenseLayer(
            fc1,
            num_units=4096,
            W = lasagne.init.Normal(0.005),
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    fc3  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(fc2, p=.5),
            num_units=4096,
            W = lasagne.init.Normal(0.005),
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    fc4  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(fc3, p=.5),
            num_units=61,
            nonlinearity=lasagne.nonlinearities.identity,
            )

    network_transformed = lasagne.layers.ReshapeLayer(network_transformed, (-1, 61, 40, 40))

    fc4_selected = SelectLayer(fc4, 61)
    
    weight_decay_layers = {fc1:0.0, fc2:0.0}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return fc4, fc4_selected, l2_penalty, network_transformed
    def __init_model(self):
        """Initializes the model and compiles the network

        For the most part, this consists of setting up some bookkeeping
        for theano and lasagne, and compiling the theano functions
        """
        logging.info('initializing model')
        if self.Xshape == None or self.yshape == None:
            if self.Xshape == None:
                logging.warning("Tried to compile Neural Net before"
                    "setting input dimensionality")
            if self.yshape == None:
                logging.warning("Tried to compile Neural Net before"
                    "setting output dimensionality")
            raise ShapeError(self.Xshape,self.yshape)

        # These are theano/lasagne symbolic variable declarationss,
        # representing... the target vector(traces)
        target_vector = T.fmatrix('y')
        # our predictions
        predictions = lasagne.layers.get_output(self.layer_out)
        validation_predictions = lasagne.layers.get_output(self.layer_out, deterministic=True)
        # the loss (diff in objective) for training
        # using MSE
        stochastic_loss = lasagne.objectives.squared_error(predictions, target_vector).mean()
        #print(stochastic_loss)
        deterministic_loss = lasagne.objectives.squared_error(validation_predictions, target_vector).mean()
        # using cross entropy
        #stochastic_loss = lasagne.objectives.categorical_crossentropy(predictions, target_vector).mean()
        # the loss for validation
        #deterministic_loss = lasagne.objectives.categorical_crossentropy(test_predictions, target_vector).mean()
        # calculate loss
        loss = stochastic_loss
        # should regularization be used?
        config = self.config
        if config:
            if config.l1_regularization:
                logging.info("Using L1 regularization")
                l1_penalty = regularize_layer_params(self.layer_out, l1) * 1e-4
                loss += l1_penalty
            if config.l2_regularization:
                logging.info("Using L2 regularization with weights")
                for sublayer in self.layer_in:
                    logging.info("\tinput layer ({1}) weight: {0}".format(self.layer_weights[sublayer],sublayer.name))
                logging.info("\toutput layer weight: {0}".format(self.layer_weights[self.layer_out]))
                l2_penalty = regularize_layer_params_weighted(self.layer_weights, l2)
                loss += l2_penalty
        else:
            logging.info("No regularization")
        # the network parameters (i.e. weights)
        all_params = lasagne.layers.get_all_params(
            self.layer_out)
        # how to update the weights
        updates = lasagne.updates.nesterov_momentum(
            loss_or_grads = loss,
            params = all_params,
            learning_rate = 0.1,
            momentum = 0.9)

        # The theano functions for training, validating, and tracing.
        #   These get method-level wrappers below
        logging.info('compiling theano functions')
        self._train_fn = theano.function(
            on_unused_input='warn',
            inputs  = [l.input_var for l in self.layer_in]+[target_vector],
            outputs = [stochastic_loss],
            updates = updates)
        self._valid_fn = theano.function(
            on_unused_input='warn',
            inputs  = [l.input_var for l in self.layer_in]+[target_vector],
            outputs = [deterministic_loss,
                validation_predictions])
        self._trace_fn = theano.function(
            on_unused_input='warn',
            inputs  = [l.input_var for l in self.layer_in],
            outputs = [validation_predictions
                * self.roi.shape[0] + self.roi.offset[0]])
Ejemplo n.º 37
0
def build_cnn(input_var=None):
    """Build the CIFAR-10 model.

    Args:
    images: Images returned from distorted_inputs() or inputs().

    Returns:
    Logits.
    """
    # We instantiate all variables using tf.get_variable() instead of
    # tf.Variable() in order to share variables across multiple GPU training runs.
    # If we only ran this model on a single GPU, we could simplify this function
    # by replacing all instances of tf.get_variable() with tf.Variable().
    #

    input_layer = InputLayer((None, 3, IMAGE_SIZE, IMAGE_SIZE), input_var=input_var)

    norm0 = BatchNormLayer(input_layer)
    
    # conv1
    conv1 = Conv2DLayer(norm0, num_filters=64, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1")
    
    conv1a = Conv2DLayer(conv1, num_filters=64, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.0),
                        name="conv1a")
    

    pool1 = MaxPool2DLayer(conv1a, pool_size=(2, 2), stride=(2, 2), pad=0)

    norm1 = BatchNormLayer(pool1) 
    # pool1

    
    # conv2
    conv2 = Conv2DLayer(lasagne.layers.dropout(norm1, p = 0.5), 
                        num_filters=128, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2')

    conv2a = Conv2DLayer(conv2, 
                        num_filters=128, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.1),
                        name='conv2a')
    
    pool2 = MaxPool2DLayer(conv2a, pool_size=(2, 2), stride=(2, 2), pad=0)

    # norm2
    norm2 = BatchNormLayer(pool2)
    
    # pool2
    

    conv3 = Conv2DLayer(lasagne.layers.dropout(norm2, p = 0.5), 
                        num_filters=256, filter_size=(3,3),
                        nonlinearity=lasagne.nonlinearities.rectify,
                        pad='same', W=lasagne.init.HeNormal(),
                        b=lasagne.init.Constant(0.1),
                        name='conv3')
    
    pool3 = MaxPool2DLayer(conv3, pool_size=(2, 2), stride=(2, 2), pad=0)
    
    norm3 = BatchNormLayer(pool3)
    
    
    # fc1
    fc1 = DenseLayer(lasagne.layers.dropout(norm3, p = 0.5), 
                     num_units=256,
                     nonlinearity=lasagne.nonlinearities.rectify,
                     W=lasagne.init.HeNormal(), b=lasagne.init.Constant(0.1),
                     name="fc1")


    # fc3
    softmax_layer = DenseLayer(lasagne.layers.dropout(fc1, p = 0.5),
                               num_units=10,
                               nonlinearity=lasagne.nonlinearities.softmax,
                               W=lasagne.init.HeNormal(),
                               b=lasagne.init.Constant(0.0),
                               name="softmax")

    # Weight Decay
    weight_decay_layers = {fc1: 0.0}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)
    network_middle_output = lasagne.layers.ReshapeLayer(softmax_layer, shape = (([0], 10)))

    return softmax_layer, network_middle_output, l2_penalty
def build_cnn(input_var=None, support_var = None, batch_size = None):

    # Input layer, as usual:
    network = lasagne.layers.InputLayer(shape=(batch_size, 1, 40, 40),
                                        input_var=input_var)
    
    support_input = lasagne.layers.InputLayer(shape=(batch_size, 10, 40, 40),
                                              input_var=support_var)

    repeatInput = Repeat(network, 10)

    network = lasagne.layers.ReshapeLayer(repeatInput, (-1, 1, 40, 40))

    network_transformed_TPS = TPSTransformationMatrixLayer(network, batch_size * 10)
    
    network_transformed_TPS_reshape = lasagne.layers.ReshapeLayer(network_transformed_TPS, (-1, 10, 40, 40))

    after_support_layer = lasagne.layers.ElemwiseMergeLayer([network_transformed_TPS_reshape, support_input], T.mul)

    after_support_layer = lasagne.layers.ReshapeLayer(after_support_layer, (-1 , 1, 40, 40))

    network = Conv2DLayer(
            after_support_layer, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())

    # Max-pooling layer of factor 2 in both dimensions:
    network = MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = Conv2DLayer(
            network, num_filters=32, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W = lasagne.init.GlorotUniform()
            )
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))


    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    fc1  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            #network,
            num_units=256,
            nonlinearity=lasagne.nonlinearities.rectify,
            )

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    fc2  = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(fc1, p=.5),
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=10,
            )

    network_transformed = lasagne.layers.ReshapeLayer(after_support_layer, (-1, 10, 40, 40))

    fc2_selected = SelectLayer(fc2, 10)

    weight_decay_layers = {network_transformed_TPS:0.1}
    l2_penalty = regularize_layer_params_weighted(weight_decay_layers, l2)

    return fc2, fc2_selected, l2_penalty, network_transformed, network