コード例 #1
0
def build_cnn():
    data_size = (None, 10, 100)  # Batch size x Img Channels x Height x Width

    input_var = T.tensor3(name="input", dtype='int64')

    values = np.array(np.random.randint(0, 1, (5, 10, 100)))
    input_var.tag.test_value = values
    input_layer = L.InputLayer(data_size, input_var=input_var)

    W = create_char_embedding_matrix()

    embed_layer = L.EmbeddingLayer(input_layer,
                                   input_size=102,
                                   output_size=101,
                                   W=W)

    reshape = L.reshape(embed_layer, (-1, 100, 101))
    dim_shuffle = L.dimshuffle(reshape, (0, 2, 1))
    #conv_layer_1 = L.Conv2DLayer(embed_layer, 4, (1), 1, 0)
    #pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=1)
    print L.get_output(dim_shuffle).tag.test_value.shape

    conv_layer_1 = L.Conv1DLayer(dim_shuffle, 50, 2, 1)

    print L.get_output(conv_layer_1).tag.test_value.shape
    print "TEST"
    pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=99)
    print L.get_output(pool_layer_1).tag.test_value.shape
    reshape_conv_1 = L.reshape(pool_layer_1, (-1, 50))

    conv_layer_2 = L.Conv1DLayer(dim_shuffle, 50, 3, 1)
    pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=98)
    reshape_conv_2 = L.reshape(pool_layer_2, (-1, 50))

    merge_layer = L.ConcatLayer([reshape_conv_1, reshape_conv_2], 1)
    print L.get_output(merge_layer).tag.test_value.shape
    reshape_output = L.reshape(merge_layer, (-1, 10, 100))
    print L.get_output(reshape_output).tag.test_value.shape

    x = T.tensor3(name="testname", dtype='int32')
    #x = T.imatrix()
    #output = L.get_output(conv_layer_1,x)

    #f = theano.function([x],output)

    word = unicode("Tat")
    word_index = np.array([])

    #print word_index

    #x_test = np.array([word_index]).astype('int32')
    #print f(x_test)

    return reshape_output
コード例 #2
0
def build_cnn(input):
    #data_size = (None,103,130)  # Batch size x Img Channels x Height x Width

    #input_var = T.tensor3(name = "input",dtype='int64')
    input_var = input

    #values = np.array(np.random.randint(0,102,(1,9,50)))

    #input_var.tag.test_value = values
    #number sentences x words x characters
    input_layer = L.InputLayer((None,9,50), input_var=input)

    W = create_char_embedding_matrix()

    embed_layer = L.EmbeddingLayer(input_layer, input_size=103,output_size=101, W=W)
    #print "EMBED", L.get_output(embed_layer).tag.test_value.shape
    reshape_embed = L.reshape(embed_layer,(-1,50,101))
    #print "reshap embed", L.get_output(reshape_embed).tag.test_value.shape
    conv_layer_1 = L.Conv1DLayer(reshape_embed, 55, 2)
    conv_layer_2 = L.Conv1DLayer(reshape_embed, 55, 3)
    #print "TEST"
    #print "Convolution Layer 1", L.get_output(conv_layer_1).tag.test_value.shape
    #print "Convolution Layer 2", L.get_output(conv_layer_2).tag.test_value.shape

    #flatten_conv_1 = L.flatten(conv_layer_1,3)
    #flatten_conv_2 = L.flatten(conv_layer_2,3)

    #reshape_max_1 = L.reshape(flatten_conv_1,(-1,49))
    #reshape_max_2 = L.reshape(flatten_conv_2, (-1,48))

    #print "OUTPUT Flatten1", L.get_output(flatten_conv_1).tag.test_value.shape
    #print "OUTPUT Flatten2", L.get_output(flatten_conv_2).tag.test_value.shape

    #print "OUTPUT reshape_max_1", L.get_output(reshape_max_1).tag.test_value.shape
    #print "OUTPUT reshape_max_2", L.get_output(reshape_max_2).tag.test_value.shape

    pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=54)
    pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=53)


    #print "OUTPUT POOL1", L.get_output(pool_layer_1).tag.test_value.shape
    #print "OUTPUT POOL2",L.get_output(pool_layer_2).tag.test_value.shape

    merge_layer = L.ConcatLayer([pool_layer_1, pool_layer_2], 1)

    flatten_merge = L.flatten(merge_layer, 2)
    reshape_merge = L.reshape(flatten_merge, (1,9,110))
    print L.get_output(reshape_embed).shape
    #print L.get_output(reshape_merge).tag.test_value.shape

    return reshape_merge, char_index_lookup
コード例 #3
0
    def get_context(self, conv_in, avg=False):
        suf = '_avg' if avg else ''

        conv_out = []
        # for n in [2,3,4,5,6,7,8,9]:
        # for n in [2,3,4,5]:
        for n in self.args.context_ngrams:
            conv = conv_in
            for i in range(self.args.conv_layers):
                conv = L.Conv1DLayer(
                    conv,
                    128,
                    n,
                    name='conv_window_%d(%d)%s' % (n, i, suf),
                    # W=HeNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1)
                    W=GlorotNormal('relu')
                    if not avg else Constant())  # (100, 128, 15-n+1)

            conv = L.MaxPool1DLayer(
                conv, self.args.window_size -
                (n - 1) * self.args.conv_layers)  # (100, 128, 1)
            conv = L.flatten(conv, 2)  # (100, 128)
            conv_out.append(conv)

        x = L.concat(conv_out, axis=1)  # (100, 1024)

        return x
コード例 #4
0
ファイル: models.py プロジェクト: yunjiewang/SoloLa
 def set_conv_layer(self, network, layer_name, dropout=True, pad=0, bnorm=False):
     opts = self.net_opts[layer_name]
     ll = layers.Conv1DLayer(
             layers.dropout(network, p=self.net_opts['dropout_p']) if dropout else network,
             num_filters=opts['num_filters'],
             filter_size=opts['filter_size'],
             stride=opts['stride'],
             pad=pad,
             name=layer_name
          )
     return layers.batch_norm(ll) if bnorm else ll
コード例 #5
0
def build_network(W,
                  number_unique_tags,
                  longest_word,
                  longest_sentence,
                  input_var=None):
    print("Building network ...")

    input_layer = L.InputLayer((None, longest_sentence, longest_word),
                               input_var=input_var)

    embed_layer = L.EmbeddingLayer(input_layer,
                                   input_size=103,
                                   output_size=101,
                                   W=W)

    reshape_embed = L.reshape(embed_layer, (-1, longest_word, 101))

    conv_layer_1 = L.Conv1DLayer(reshape_embed, longest_word, 2)
    conv_layer_2 = L.Conv1DLayer(reshape_embed, longest_word, 3)

    pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=longest_word - 1)
    pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=longest_word - 2)

    merge_layer = L.ConcatLayer([pool_layer_1, pool_layer_2], 1)
    flatten_merge = L.flatten(merge_layer, 2)
    reshape_merge = L.reshape(flatten_merge,
                              (-1, longest_sentence, int(longest_word * 2)))

    l_re = lasagne.layers.RecurrentLayer(
        reshape_merge,
        N_HIDDEN,
        nonlinearity=lasagne.nonlinearities.sigmoid,
        mask_input=None)
    l_out = lasagne.layers.DenseLayer(
        l_re, number_unique_tags, nonlinearity=lasagne.nonlinearities.softmax)

    print "DONE BUILDING NETWORK"
    return l_out
コード例 #6
0
    def _build_net(self, emb_char_filter_size=5, emb_dropout=True, **kwargs):

        batch_size = self.mask_context_var.shape[0]
        context_len = self.mask_context_var.shape[1]
        question_len = self.question_var.shape[1]
        context_word_len = self.context_char_var.shape[2]
        question_word_len = self.question_char_var.shape[2]

        self.batch_size = batch_size
        self.context_len = context_len
        ''' Inputs and word embeddings'''

        l_context_char = LL.InputLayer(shape=(None, None, None),
                                       input_var=self.context_char_var)
        l_question_char = LL.InputLayer(shape=(None, None, None),
                                        input_var=self.question_char_var)

        l_c_mask = LL.InputLayer(shape=(None, None),
                                 input_var=self.mask_context_var)
        l_q_mask = LL.InputLayer(shape=(None, None),
                                 input_var=self.mask_question_var)

        l_c_char_mask = LL.InputLayer(shape=(None, None, None),
                                      input_var=self.mask_context_char_var)
        l_q_char_mask = LL.InputLayer(shape=(None, None, None),
                                      input_var=self.mask_question_char_var)

        l_c_emb = LL.InputLayer(shape=(None, None, self.emb_size),
                                input_var=self.context_var)
        l_q_emb = LL.InputLayer(shape=(None, None, self.emb_size),
                                input_var=self.question_var)

        if self.train_unk:
            l_c_unk_mask = LL.InputLayer(shape=(None, None),
                                         input_var=self.mask_context_unk_var)
            l_q_unk_mask = LL.InputLayer(shape=(None, None),
                                         input_var=self.mask_question_unk_var)

            l_c_emb = TrainUnkLayer(l_c_emb,
                                    l_c_unk_mask,
                                    output_size=self.emb_size,
                                    W=self.word_embeddings[0])

            l_q_emb = TrainUnkLayer(l_q_emb,
                                    l_q_unk_mask,
                                    output_size=self.emb_size,
                                    W=l_c_emb.W)

        if self.negative:
            l_c_emb = TrainNAWLayer(l_c_emb,
                                    l_c_mask,
                                    output_size=self.emb_size)
        ''' Char-embeddings '''

        # (batch_size x context_len x context_word_len x emb_char_size)
        l_c_char_emb = LL.EmbeddingLayer(l_context_char,
                                         input_size=self.alphabet_size,
                                         output_size=self.emb_char_size)

        l_q_char_emb = LL.EmbeddingLayer(l_question_char,
                                         input_size=self.alphabet_size,
                                         output_size=self.emb_char_size,
                                         W=l_c_char_emb.W)

        # here I do multiplication of character embeddings with masks,
        # because I want to pad them with constant zeros

        l_c_char_mask = ForgetSizeLayer(
            LL.dimshuffle(l_c_char_mask, (0, 1, 2, 'x')))
        l_q_char_mask = ForgetSizeLayer(
            LL.dimshuffle(l_q_char_mask, (0, 1, 2, 'x')))

        l_c_char_emb = LL.ElemwiseMergeLayer([l_c_char_emb, l_c_char_mask],
                                             T.mul)
        l_q_char_emb = LL.ElemwiseMergeLayer([l_q_char_emb, l_q_char_mask],
                                             T.mul)

        # convolutions

        l_c_char_emb = LL.dimshuffle(
            LL.reshape(l_c_char_emb, (batch_size * context_len,
                                      context_word_len, self.emb_char_size)),
            (0, 2, 1))
        l_c_char_conv = LL.Conv1DLayer(l_c_char_emb,
                                       num_filters=self.num_emb_char_filters,
                                       filter_size=emb_char_filter_size,
                                       nonlinearity=L.nonlinearities.tanh,
                                       pad=self.conv)
        # (batch_size * context_len x num_filters x context_word_len + filter_size - 1)

        l_c_char_emb = LL.ExpressionLayer(l_c_char_conv,
                                          lambda X: X.max(2),
                                          output_shape='auto')
        l_c_char_emb = LL.reshape(
            l_c_char_emb, (batch_size, context_len, self.num_emb_char_filters))

        l_q_char_emb = LL.dimshuffle(
            LL.reshape(l_q_char_emb, (batch_size * question_len,
                                      question_word_len, self.emb_char_size)),
            (0, 2, 1))
        l_q_char_conv = LL.Conv1DLayer(l_q_char_emb,
                                       num_filters=self.num_emb_char_filters,
                                       filter_size=emb_char_filter_size,
                                       nonlinearity=L.nonlinearities.tanh,
                                       W=l_c_char_conv.W,
                                       b=l_c_char_conv.b,
                                       pad=self.conv)
        # (batch_size * question_len x num_filters x question_word_len + filter_size - 1)

        l_q_char_emb = LL.ExpressionLayer(l_q_char_conv,
                                          lambda X: X.max(2),
                                          output_shape='auto')
        l_q_char_emb = LL.reshape(
            l_q_char_emb,
            (batch_size, question_len, self.num_emb_char_filters))
        ''' Concatenating both embeddings '''

        l_c_emb = LL.concat([l_c_emb, l_c_char_emb], axis=2)
        l_q_emb = LL.concat([l_q_emb, l_q_char_emb], axis=2)

        # originally I had dropout here
        ''' Highway layer allowing for interaction between embeddings '''

        l_c_P = LL.reshape(l_c_emb,
                           (batch_size * context_len,
                            self.emb_size + self.num_emb_char_filters))
        l_c_P = LL.DenseLayer(l_c_P,
                              num_units=self.rec_size,
                              b=None,
                              nonlinearity=None)

        l_c_high = HighwayLayer(l_c_P)
        l_c_emb = LL.reshape(l_c_high,
                             (batch_size, context_len, self.rec_size))

        l_q_P = LL.reshape(l_q_emb,
                           (batch_size * question_len,
                            self.emb_size + self.num_emb_char_filters))
        l_q_P = LL.DenseLayer(l_q_P,
                              num_units=self.rec_size,
                              W=l_c_P.W,
                              b=None,
                              nonlinearity=None)

        l_q_high = HighwayLayer(l_q_P,
                                W1=l_c_high.W1,
                                b1=l_c_high.b1,
                                W2=l_c_high.W2,
                                b2=l_c_high.b2)
        l_q_emb = LL.reshape(l_q_high,
                             (batch_size, question_len, self.rec_size))
        ''' Calculating wiq features from https://arxiv.org/abs/1703.04816 '''

        l_weighted_feat = WeightedFeatureLayer(
            [l_c_emb, l_q_emb, l_c_mask, l_q_mask])  # batch_size x context_len
        l_weighted_feat = LL.dimshuffle(l_weighted_feat, (0, 1, 'x'))

        # batch_size x context_len
        l_bin_feat = LL.InputLayer(shape=(None, None),
                                   input_var=self.bin_feat_var)
        l_bin_feat = LL.dimshuffle(l_bin_feat, (0, 1, 'x'))
        ''' Dropout at the embeddings '''

        if emb_dropout:
            print('Using dropout after wiq calculation.')
            l_c_emb = LL.dropout(l_c_emb)
            l_q_emb = LL.dropout(l_q_emb)
        ''' Here we concatenate wiq features to embeddings'''

        # both features are concatenated to the embeddings
        # for the question we fix the features to 1
        l_c_emb = LL.concat([l_c_emb, l_bin_feat, l_weighted_feat], axis=2)
        l_q_emb = LL.pad(l_q_emb,
                         width=[(0, 2)],
                         val=L.utils.floatX(1),
                         batch_ndim=2)
        ''' Context and question encoding using the same BiLSTM for both '''

        # output shape is (batch_size x context_len x rec_size)
        l_c_enc_forw = LL.LSTMLayer(l_c_emb,
                                    num_units=self.rec_size,
                                    grad_clipping=100,
                                    mask_input=l_c_mask)

        l_c_enc_back = LL.LSTMLayer(l_c_emb,
                                    num_units=self.rec_size,
                                    grad_clipping=100,
                                    mask_input=l_c_mask,
                                    backwards=True)

        # output shape is (batch_size x question_len x rec_size)
        l_q_enc_forw = LL.LSTMLayer(
            l_q_emb,
            num_units=self.rec_size,
            grad_clipping=100,
            mask_input=l_q_mask,
            ingate=LL.Gate(W_in=l_c_enc_forw.W_in_to_ingate,
                           W_hid=l_c_enc_forw.W_hid_to_ingate,
                           W_cell=l_c_enc_forw.W_cell_to_ingate,
                           b=l_c_enc_forw.b_ingate),
            forgetgate=LL.Gate(W_in=l_c_enc_forw.W_in_to_forgetgate,
                               W_hid=l_c_enc_forw.W_hid_to_forgetgate,
                               W_cell=l_c_enc_forw.W_cell_to_forgetgate,
                               b=l_c_enc_forw.b_forgetgate),
            outgate=LL.Gate(W_in=l_c_enc_forw.W_in_to_outgate,
                            W_hid=l_c_enc_forw.W_hid_to_outgate,
                            W_cell=l_c_enc_forw.W_cell_to_outgate,
                            b=l_c_enc_forw.b_outgate),
            cell=LL.Gate(W_in=l_c_enc_forw.W_in_to_cell,
                         W_hid=l_c_enc_forw.W_hid_to_cell,
                         W_cell=None,
                         b=l_c_enc_forw.b_cell,
                         nonlinearity=L.nonlinearities.tanh))

        l_q_enc_back = LL.LSTMLayer(
            l_q_emb,
            num_units=self.rec_size,
            grad_clipping=100,
            mask_input=l_q_mask,
            backwards=True,
            ingate=LL.Gate(W_in=l_c_enc_back.W_in_to_ingate,
                           W_hid=l_c_enc_back.W_hid_to_ingate,
                           W_cell=l_c_enc_back.W_cell_to_ingate,
                           b=l_c_enc_back.b_ingate),
            forgetgate=LL.Gate(W_in=l_c_enc_back.W_in_to_forgetgate,
                               W_hid=l_c_enc_back.W_hid_to_forgetgate,
                               W_cell=l_c_enc_back.W_cell_to_forgetgate,
                               b=l_c_enc_back.b_forgetgate),
            outgate=LL.Gate(W_in=l_c_enc_back.W_in_to_outgate,
                            W_hid=l_c_enc_back.W_hid_to_outgate,
                            W_cell=l_c_enc_back.W_cell_to_outgate,
                            b=l_c_enc_back.b_outgate),
            cell=LL.Gate(W_in=l_c_enc_back.W_in_to_cell,
                         W_hid=l_c_enc_back.W_hid_to_cell,
                         W_cell=None,
                         b=l_c_enc_back.b_cell,
                         nonlinearity=L.nonlinearities.tanh))

        # batch_size x context_len  x 2*rec_size
        l_c_enc = LL.concat([l_c_enc_forw, l_c_enc_back], axis=2)
        # batch_size x question_len x 2*rec_size
        l_q_enc = LL.concat([l_q_enc_forw, l_q_enc_back], axis=2)

        def proj_init():
            return np.vstack([
                np.eye(self.rec_size, dtype=theano.config.floatX),
                np.eye(self.rec_size, dtype=theano.config.floatX)
            ])

        # this is H from the paper, shape: (batch_size * context_len x
        # rec_size)
        l_c_proj = LL.reshape(l_c_enc,
                              (batch_size * context_len, 2 * self.rec_size))
        l_c_proj = LL.DenseLayer(l_c_proj,
                                 num_units=self.rec_size,
                                 W=proj_init(),
                                 b=None,
                                 nonlinearity=L.nonlinearities.tanh)

        # this is Z from the paper, shape: (batch_size * question_len x
        # rec_size)
        l_q_proj = LL.reshape(l_q_enc,
                              (batch_size * question_len, 2 * self.rec_size))
        l_q_proj = LL.DenseLayer(l_q_proj,
                                 num_units=self.rec_size,
                                 W=proj_init(),
                                 b=None,
                                 nonlinearity=L.nonlinearities.tanh)
        ''' Additional, weighted question encoding (alphas from paper) '''

        l_alpha = LL.DenseLayer(
            l_q_proj,  # batch_size * question_len x 1
            num_units=1,
            b=None,
            nonlinearity=None)

        # batch_size x question_len
        l_alpha = MaskedSoftmaxLayer(
            LL.reshape(l_alpha, (batch_size, question_len)), l_q_mask)

        # batch_size x rec_size
        l_z_hat = BatchedDotLayer([
            LL.reshape(l_q_proj, (batch_size, question_len, self.rec_size)),
            l_alpha
        ])

        return l_c_proj, l_z_hat