Esempio n. 1
0
    def __init__(self, We_initial, params):

        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        gidx = T.imatrix()
        gmask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)

        if params.model == "wordaverage":
            l_out = lasagne_average_layer([l_emb, l_mask], tosum=False)

        elif params.model == "lstm":
            l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                              params.dim,
                                              peepholes=True,
                                              learn_init=False,
                                              mask_input=l_mask)
            l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        l_softmax = lasagne.layers.DenseLayer(l_out,
                                              2,
                                              nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in: gidx, l_mask: gmask})
        cost = T.nnet.categorical_crossentropy(X, scores)

        network_params = lasagne.layers.get_all_params(l_out, trainable=True)
        network_params.pop(0)

        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        self.final_layer = l_softmax
        print self.all_params

        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in network_params)
        cost = T.mean(cost) + l2

        grads = theano.gradient.grad(cost, self.all_params)
        updates = params.learner(grads, self.all_params, params.eta)

        self.train_function = theano.function([gidx, gmask, scores],
                                              cost,
                                              updates=updates)
        self.predict_function = theano.function([gidx, gmask], X)

        print "Num Params:", lasagne.layers.count_params(self.final_layer)
    def __init__(self, We_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1 = T.imatrix()
        g2 = T.imatrix()
        p1 = T.imatrix()
        p2 = T.imatrix()

        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)

        if params.model == "gran":
            l_lstm = lasagne_gran_layer(l_emb,
                                        300,
                                        peepholes=True,
                                        learn_init=False,
                                        mask_input=l_mask,
                                        gran_type=1)

            if params.gran_type == 1 or params.gran_type == 2:
                l_out = lasagne_average_layer([l_lstm, l_mask], tosum=False)
            else:
                l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        elif params.model == "wordaverage":
            l_out = lasagne_average_layer([l_emb, l_mask], tosum=False)

        self.final_layer = l_out

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1,
            l_mask: g1mask
        },
                                          deterministic=False)
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2,
            l_mask: g2mask
        },
                                          deterministic=False)
        embp1 = lasagne.layers.get_output(l_out, {
            l_in: p1,
            l_mask: p1mask
        },
                                          deterministic=False)
        embp2 = lasagne.layers.get_output(l_out, {
            l_in: p2,
            l_mask: p2mask
        },
                                          deterministic=False)

        def fix(x):
            return x * (x > 0) + 1E-10 * (x <= 0)

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(fix(T.sum(embg1**2, axis=1))) * T.sqrt(
            fix(T.sum(embg2**2, axis=1)))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(fix(T.sum(embp1**2, axis=1))) * T.sqrt(
            fix(T.sum(embg1**2, axis=1)))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(fix(T.sum(embp2**2, axis=1))) * T.sqrt(
            fix(T.sum(embg2**2, axis=1)))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_out, trainable=True)
        network_params.pop(0)

        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        print self.all_params

        if params.LC:
            l2 = 0.5 * params.LC * sum(
                lasagne.regularization.l2(x) for x in network_params)
        else:
            l2 = 0
        word_reg = 0.5 * params.LW * lasagne.regularization.l2(We - initial_We)
        cost = T.mean(cost) + l2 + word_reg

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm

        self.feedforward_function = theano.function([g1, g1mask], embg1)
        prediction = g1g2
        self.scoring_function = theano.function([g1, g2, g1mask, g2mask],
                                                prediction)

        #updates
        grads = theano.gradient.grad(cost, self.all_params)
        updates = params.learner(grads, self.all_params, params.eta)

        self.train_function = theano.function(
            [g1, g2, p1, p2, g1mask, g2mask, p1mask, p2mask],
            cost,
            updates=updates)

        cost = costp1g1 + costp2g2
        cost = T.mean(cost)
        self.cost_function = theano.function(
            [g1, g2, p1, p2, g1mask, g2mask, p1mask, p2mask], cost)

        print "Num Params:", lasagne.layers.count_params(self.final_layer)
    def __init__(self,
                 We_initial_ngrams,
                 We_initial_words,
                 params,
                 We_initial_lstm=None):

        if We_initial_ngrams is not None:
            We_ngrams = theano.shared(
                np.asarray(We_initial_ngrams, dtype=config.floatX))
        if We_initial_words is not None:
            We_words = theano.shared(
                np.asarray(We_initial_words, dtype=config.floatX))
        if We_initial_lstm is not None:
            We_lstm = theano.shared(
                np.asarray(We_initial_lstm, dtype=config.floatX))

        ng_g1 = T.imatrix()
        ng_g2 = T.imatrix()
        ng_p1 = T.imatrix()
        ng_p2 = T.imatrix()
        ng_g1mask = T.matrix()
        ng_g2mask = T.matrix()
        ng_p1mask = T.matrix()
        ng_p2mask = T.matrix()

        wd_g1 = T.imatrix()
        wd_g2 = T.imatrix()
        wd_p1 = T.imatrix()
        wd_p2 = T.imatrix()
        wd_g1mask = T.matrix()
        wd_g2mask = T.matrix()
        wd_p1mask = T.matrix()
        wd_p2mask = T.matrix()

        lstm_g1batchindices = T.imatrix()
        lstm_g2batchindices = T.imatrix()
        lstm_p1batchindices = T.imatrix()
        lstm_p2batchindices = T.imatrix()
        lstm_g1mask = T.matrix()
        lstm_g2mask = T.matrix()
        lstm_p1mask = T.matrix()
        lstm_p2mask = T.matrix()

        ng_inputs = [
            ng_g1, ng_g2, ng_p1, ng_p2, ng_g1mask, ng_g2mask, ng_p1mask,
            ng_p2mask
        ]

        wd_inputs = [
            wd_g1, wd_g2, wd_p1, wd_p2, wd_g1mask, wd_g2mask, wd_p1mask,
            wd_p2mask
        ]

        lstm_inputs = [
            lstm_g1batchindices, lstm_g2batchindices, lstm_p1batchindices,
            lstm_p2batchindices, lstm_g1mask, lstm_g2mask, lstm_p1mask,
            lstm_p2mask
        ]

        if "ngram" in params.combination_type:
            l_in_ngrams = lasagne.layers.InputLayer((None, None))
            l_mask_ngrams = lasagne.layers.InputLayer(shape=(None, None))
            l_emb_ngrams = lasagne.layers.EmbeddingLayer(
                l_in_ngrams,
                input_size=We_ngrams.get_value().shape[0],
                output_size=We_ngrams.get_value().shape[1],
                W=We_ngrams)

            l_out_ngrams = lasagne_average_layer([l_emb_ngrams, l_mask_ngrams],
                                                 tosum=False)

        if "word" in params.combination_type:
            l_in_words = lasagne.layers.InputLayer((None, None))
            l_mask_words = lasagne.layers.InputLayer(shape=(None, None))
            l_emb_words = lasagne.layers.EmbeddingLayer(
                l_in_words,
                input_size=We_words.get_value().shape[0],
                output_size=We_words.get_value().shape[1],
                W=We_words)
            l_out_wd = lasagne_average_layer([l_emb_words, l_mask_words],
                                             tosum=False)

        if "lstm" in params.combination_type:
            l_in_lstm = lasagne.layers.InputLayer((None, None))
            l_mask_lstm = lasagne.layers.InputLayer(shape=(None, None))
            l_emb_lstm = lasagne.layers.EmbeddingLayer(
                l_in_lstm,
                input_size=We_lstm.get_value().shape[0],
                output_size=We_lstm.get_value().shape[1],
                W=We_lstm)
            l_lstm = lasagne.layers.LSTMLayer(l_emb_lstm,
                                              params.dim,
                                              peepholes=True,
                                              learn_init=False,
                                              mask_input=l_mask_lstm)
            l_out_lstm = lasagne_average_layer([l_lstm, l_mask_lstm],
                                               tosum=False)

        lis = []
        if "ngram" in params.combination_type:
            lis.append(l_out_ngrams)
        if "word" in params.combination_type:
            lis.append(l_out_wd)
        if "lstm" in params.combination_type:
            lis.append(l_out_lstm)

        if params.combination_method == "add":
            l_out = lasagne_add_layer(lis)
        elif params.combination_method == "concat":
            l_out = lasagne.layers.ConcatLayer(lis)

        self.final_layer = l_out

        if params.combination_type == "ngram-word":
            embg1 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_g1,
                    l_mask_ngrams: ng_g1mask,
                    l_in_words: wd_g1,
                    l_mask_words: wd_g1mask
                })
            embg2 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_g2,
                    l_mask_ngrams: ng_g2mask,
                    l_in_words: wd_g2,
                    l_mask_words: wd_g2mask
                })
            embp1 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_p1,
                    l_mask_ngrams: ng_p1mask,
                    l_in_words: wd_p1,
                    l_mask_words: wd_p1mask
                })
            embp2 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_p2,
                    l_mask_ngrams: ng_p2mask,
                    l_in_words: wd_p2,
                    l_mask_words: wd_p2mask
                })
        elif params.combination_type == "ngram-lstm":
            embg1 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_g1,
                    l_mask_ngrams: ng_g1mask,
                    l_in_lstm: lstm_g1batchindices,
                    l_mask_lstm: lstm_g1mask
                })
            embg2 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_g2,
                    l_mask_ngrams: ng_g2mask,
                    l_in_lstm: lstm_g2batchindices,
                    l_mask_lstm: lstm_g2mask
                })
            embp1 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_p1,
                    l_mask_ngrams: ng_p1mask,
                    l_in_lstm: lstm_p1batchindices,
                    l_mask_lstm: lstm_p1mask
                })
            embp2 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_p2,
                    l_mask_ngrams: ng_p2mask,
                    l_in_lstm: lstm_p2batchindices,
                    l_mask_lstm: lstm_p2mask
                })
        elif params.combination_type == "word-lstm":
            embg1 = lasagne.layers.get_output(
                l_out, {
                    l_in_words: wd_g1,
                    l_mask_words: wd_g1mask,
                    l_in_lstm: lstm_g1batchindices,
                    l_mask_lstm: lstm_g1mask
                })
            embg2 = lasagne.layers.get_output(
                l_out, {
                    l_in_words: wd_g2,
                    l_mask_words: wd_g2mask,
                    l_in_lstm: lstm_g2batchindices,
                    l_mask_lstm: lstm_g2mask
                })
            embp1 = lasagne.layers.get_output(
                l_out, {
                    l_in_words: wd_p1,
                    l_mask_words: wd_p1mask,
                    l_in_lstm: lstm_p1batchindices,
                    l_mask_lstm: lstm_p1mask
                })
            embp2 = lasagne.layers.get_output(
                l_out, {
                    l_in_words: wd_p2,
                    l_mask_words: wd_p2mask,
                    l_in_lstm: lstm_p2batchindices,
                    l_mask_lstm: lstm_p2mask
                })
        elif params.combination_type == "ngram-word-lstm":
            embg1 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_g1,
                    l_mask_ngrams: ng_g1mask,
                    l_in_words: wd_g1,
                    l_mask_words: wd_g1mask,
                    l_in_lstm: lstm_g1batchindices,
                    l_mask_lstm: lstm_g1mask
                })
            embg2 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_g2,
                    l_mask_ngrams: ng_g2mask,
                    l_in_words: wd_g2,
                    l_mask_words: wd_g2mask,
                    l_in_lstm: lstm_g2batchindices,
                    l_mask_lstm: lstm_g2mask
                })
            embp1 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_p1,
                    l_mask_ngrams: ng_p1mask,
                    l_in_words: wd_p1,
                    l_mask_words: wd_p1mask,
                    l_in_lstm: lstm_p1batchindices,
                    l_mask_lstm: lstm_p1mask
                })
            embp2 = lasagne.layers.get_output(
                l_out, {
                    l_in_ngrams: ng_p2,
                    l_mask_ngrams: ng_p2mask,
                    l_in_words: wd_p2,
                    l_mask_words: wd_p2mask,
                    l_in_lstm: lstm_p2batchindices,
                    l_mask_lstm: lstm_p2mask
                })

        def fix(x):
            return x * (x > 0) + 1E-10 * (x <= 0)

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(fix(T.sum(embg1**2, axis=1))) * T.sqrt(
            fix(T.sum(embg2**2, axis=1)))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(fix(T.sum(embp1**2, axis=1))) * T.sqrt(
            fix(T.sum(embg1**2, axis=1)))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(fix(T.sum(embp2**2, axis=1))) * T.sqrt(
            fix(T.sum(embg2**2, axis=1)))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_out, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        print(self.all_params)

        cost = T.mean(cost)

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm
        prediction = g1g2

        if params.combination_type == "ngram-word":
            self.feedforward_function = theano.function(
                [ng_g1, ng_g1mask, wd_g1, wd_g1mask], embg1)
            self.scoring_function = theano.function([
                ng_g1, ng_g1mask, wd_g1, wd_g1mask, ng_g2, ng_g2mask, wd_g2,
                wd_g2mask
            ], prediction)
        elif params.combination_type == "ngram-lstm":
            self.feedforward_function = theano.function(
                [ng_g1, ng_g1mask, lstm_g1batchindices, lstm_g1mask], embg1)
            self.scoring_function = theano.function([
                ng_g1, ng_g1mask, lstm_g1batchindices, lstm_g1mask, ng_g2,
                ng_g2mask, lstm_g2batchindices, lstm_g2mask
            ], prediction)
        elif params.combination_type == "word-lstm":
            self.feedforward_function = theano.function(
                [wd_g1, wd_g1mask, lstm_g1batchindices, lstm_g1mask], embg1)
            self.scoring_function = theano.function([
                wd_g1, wd_g1mask, lstm_g1batchindices, lstm_g1mask, wd_g2,
                wd_g2mask, lstm_g2batchindices, lstm_g2mask
            ], prediction)
        elif params.combination_type == "ngram-word-lstm":
            self.feedforward_function = theano.function([
                ng_g1, ng_g1mask, wd_g1, wd_g1mask, lstm_g1batchindices,
                lstm_g1mask
            ], embg1)
            self.scoring_function = theano.function([
                ng_g1, ng_g1mask, wd_g1, wd_g1mask, lstm_g1batchindices,
                lstm_g1mask, ng_g2, ng_g2mask, wd_g2, wd_g2mask,
                lstm_g2batchindices, lstm_g2mask
            ], prediction)

        grads = theano.gradient.grad(cost, self.all_params)
        updates = params.learner(grads, self.all_params, params.eta)

        cost = costp1g1 + costp2g2
        cost = T.mean(cost)

        if params.combination_type == "ngram-word":
            self.train_function = theano.function(ng_inputs + wd_inputs,
                                                  cost,
                                                  updates=updates)
            self.cost_function = theano.function(ng_inputs + wd_inputs, cost)
        elif params.combination_type == "ngram-lstm":
            self.train_function = theano.function(ng_inputs + lstm_inputs,
                                                  cost,
                                                  updates=updates)
            self.cost_function = theano.function(ng_inputs + lstm_inputs, cost)
        elif params.combination_type == "word-lstm":
            self.train_function = theano.function(wd_inputs + lstm_inputs,
                                                  cost,
                                                  updates=updates)
            self.cost_function = theano.function(wd_inputs + lstm_inputs, cost)
        elif params.combination_type == "ngram-word-lstm":
            self.train_function = theano.function(ng_inputs + wd_inputs +
                                                  lstm_inputs,
                                                  cost,
                                                  updates=updates)
            self.cost_function = theano.function(
                ng_inputs + wd_inputs + lstm_inputs, cost)

        print("Num Params:", lasagne.layers.count_params(self.final_layer))