예제 #1
0
    def __init__(self, We_initial, params):
        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        if params.npc > 0:
            pc = theano.shared(np.asarray(params.pc, dtype = config.floatX))

        g1batchindices = T.imatrix()
        g1mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_out = lasagne.layers.DenseLayer(l_average, params.layersize, nonlinearity=params.nonlinearity)
        embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask})
        if params.npc <= 0:
            print("#pc <=0, do not remove pc")
        elif params.npc == 1:
            print("#pc == 1")
            proj =  embg.dot(pc.transpose())
            embg = embg - theano.tensor.outer(proj, pc)
        else:
            print("#pc > 1")
            proj =  embg.dot(pc.transpose())
            embg = embg - theano.tensor.dot(proj, pc)

        l_in2 = lasagne.layers.InputLayer((None, params.layersize))
        l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in2:embg})
        cost = T.nnet.categorical_crossentropy(X,scores)
        prediction = T.argmax(X, axis=1)

        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0) # do not include the word embedding as network parameters
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg)
        self.scoring_function = theano.function([g1batchindices, g1mask],prediction)
        self.cost_function = theano.function([scores, g1batchindices, g1mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, list(range(grad.ndim))) for grad in grads]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices, g1mask], cost, updates=updates)
예제 #2
0
    def __init__(self, We_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        g1batchindices = T.imatrix(); g2batchindices = T.imatrix()
        p1batchindices = T.imatrix(); p2batchindices = T.imatrix()
        g1mask = T.matrix(); g2mask = T.matrix()
        p1mask = T.matrix(); p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, W=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.), nonlinearity=params.nonlinearity)
        l_2 = lasagne.layers.DenseLayer(l_1, params.hiddensize, nonlinearity=params.nonlinearity)
        l_3 = lasagne.layers.DenseLayer(l_2, params.hiddensize, nonlinearity=params.nonlinearity)
        l_4 = lasagne.layers.DenseLayer(l_3, params.hiddensize, nonlinearity=params.nonlinearity)

        l_end = None
        if params.numlayers == 1:
            l_end = l_1
        elif params.numlayers == 2:
            l_end = l_2
        elif params.numlayers == 3:
            l_end = l_3
        elif params.numlayers == 4:
            l_end = l_4
        else:
            raise ValueError('Only 1-4 layers are supported currently.')

        embg1 = lasagne.layers.get_output(l_end, {l_in:g1batchindices, l_mask:g1mask})
        embg2 = lasagne.layers.get_output(l_end, {l_in:g2batchindices, l_mask:g2mask})
        embp1 = lasagne.layers.get_output(l_end, {l_in:p1batchindices, l_mask:p1mask})
        embp2 = lasagne.layers.get_output(l_end, {l_in:p2batchindices, l_mask:p2mask})

        g1g2 = (embg1*embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1*embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2,axis=1)) * T.sqrt(T.sum(embg1**2,axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2*embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1*(costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2*(costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_end, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_end, trainable=True)

        #regularization
        l2 = 0.5*params.LC*sum(lasagne.regularization.l2(x) for x in network_params)
        if params.updatewords:
            word_reg = 0.5*params.LW*lasagne.regularization.l2(We-initial_We)
            cost = T.mean(cost) + l2 + word_reg
        else:
            cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg1)
        self.cost_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                             g1mask, g2mask, p1mask, p2mask], cost)

        prediction = g1g2

        self.scoring_function = theano.function([g1batchindices, g2batchindices,
                             g1mask, g2mask],prediction)

        self.train_function = None
        if params.updatewords:
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                             g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
        else:
            self.all_params = network_params
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                             g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
예제 #3
0
    def __init__(self, We_initial, params):

        params.siamese = True
        ## Params
        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        ## Symbolic Params
        # Input variable for a batch of sentences who seek 
        # for target synonyms and antonyms in the next tensor
        senBatch_indices = T.imatrix(); senMask = T.matrix()

        # Input variable for a batch of positive and negative
        # examples (so syn, neg1, neg2, ...)
        targetBatch_indices = T.itensor3(); targetMask = T.tensor3()
        
        targets = T.matrix()
        

        ## First embedding input layer
        l_in_1   = lasagne.layers.InputLayer((None, None, 1))
        l_mask_1 = lasagne.layers.InputLayer(shape=(None, None))
        # First embedding layer and Knowledge Distillation's embedding Layer
        l_emb_1  = lasagne.layers.EmbeddingLayer(l_in_1, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_emb_1_reg  = lasagne.layers.EmbeddingLayer(l_in_1, input_size=initial_We.get_value().shape[0], output_size=initial_We.get_value().shape[1], W=initial_We)
        l_emb_1_reg.params[l_emb_1_reg.W].remove('trainable')
        # First Average Layer and Knowledge Distillation's First Average Layer
        #l_emb_1_drop = lasagne.layers.DropoutLayer(l_emb_1, p=0.8)
        l_average_1 = lasagne_average_layer([l_emb_1, l_mask_1])
        l_average_1_reg = lasagne_average_layer([l_emb_1_reg, l_mask_1])


        in2embgs = lasagne.layers.get_output(l_emb_1, {l_in_1:senBatch_indices}, deterministic=True)
        embg1 = lasagne.layers.get_output(l_average_1, {l_in_1:senBatch_indices, l_mask_1:senMask}, deterministic=True)



        ## Second embedding input layer
        l_in_2   = lasagne.layers.InputLayer(shape=(None, None, None, 1))
        l_mask_2 = lasagne.layers.InputLayer(shape=(None, None, None))
        # Second embedding layer, the weights tied with the first embedding layer
        l_emb_2  = lasagne.layers.EmbeddingLayer(l_in_2, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=l_emb_1.W)
        l_emb_2_reg  = lasagne.layers.EmbeddingLayer(l_in_2, input_size=initial_We.get_value().shape[0], output_size=initial_We.get_value().shape[1], W=l_emb_1_reg.W)
        l_emb_2_reg.params[l_emb_2_reg.W].remove('trainable')
        


        # Second Average Layer
        #l_emb_2 = lasagne.layers.DropoutLayer(l_emb_2, p=0.8)
        l_average_2 = averageLayer_matrix([l_emb_2, l_mask_2])
        l_transpose_2 = lasagne.layers.DimshuffleLayer(l_average_2, (0,2,1))
        # Knowledge Distillation's Second Average Layer
        l_average_2_reg = averageLayer_matrix([l_emb_2_reg, l_mask_2])
        l_transpose_2_reg = lasagne.layers.DimshuffleLayer(l_average_2_reg, (0,2,1))

        
        ## Layer Combination
        l_cosine = cosineLayer([l_average_1, l_transpose_2], We.get_value().shape[1])
        g1g2 = lasagne.layers.get_output(l_cosine, {l_in_1:senBatch_indices, \
                    l_mask_1:senMask, l_in_2:targetBatch_indices, l_mask_2:targetMask}, deterministic=True)
        g1g2 = g1g2[:, 0]
        # Knowledge Distillation's Layer Combination
        l_cosine_reg = cosineLayer([l_average_1_reg, l_transpose_2_reg], We.get_value().shape[1])
        

        l_final_layer = softMaxLayer(l_cosine)
        # Knowledge Distillation's Layer Combination
        l_final_layer_reg = softMaxLayer(l_cosine_reg)
        
        ## Objective Function
        prediction = lasagne.layers.get_output(l_final_layer, {l_in_1:senBatch_indices, \
                    l_mask_1:senMask, l_in_2:targetBatch_indices, l_mask_2:targetMask})
        # Knowledge Distillation's Prediction
        prediction_reg = lasagne.layers.get_output(l_final_layer_reg, {l_in_1:senBatch_indices, \
                    l_mask_1:senMask, l_in_2:targetBatch_indices, l_mask_2:targetMask})


        self.all_params = lasagne.layers.get_all_params(l_final_layer, trainable=True)

        loss = lasagne.objectives.categorical_crossentropy(prediction, targets)
        # Knowledge Distillation's Loss
        loss_reg = lasagne.objectives.categorical_crossentropy(prediction, prediction_reg)
        
        
        cost = params.LW*loss_reg.mean() + params.hyper_k1*loss.mean() 

        #feedforward
        self.feedforward_function = theano.function([senBatch_indices,senMask], embg1)
        self.cost_function = theano.function([senBatch_indices, senMask, targetBatch_indices, 
                                              targetMask, targets], cost)
        self.cost_distillation = theano.function([senBatch_indices, senMask, targetBatch_indices, 
                                              targetMask], loss_reg.mean())
       
        self.scoring_function = theano.function([senBatch_indices, senMask, 
                                    targetBatch_indices, targetMask], g1g2)


        self.word2embeddings = theano.function([senBatch_indices], in2embgs)


        #updates
        grads = theano.gradient.grad(cost, self.all_params)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.all_params, params.eta)
        self.train_function = theano.function([senBatch_indices, senMask, 
                                    targetBatch_indices, targetMask, targets], cost, updates=updates)
    def __init__(self, We_initial, params):

        print "WordModel"

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_out = lasagne_average_layer([l_emb, l_mask])

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })

        g1_dot_g2 = embg1 * embg2
        g1_abs_g2 = abs(embg1 - embg2)

        lin_dot = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        lin_abs = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(
            l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                              params.nout,
                                              nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {
            lin_dot: g1_dot_g2,
            lin_abs: g1_abs_g2
        })
        Y = T.log(X)

        cost = scores * (T.log(scores) - Y)
        cost = cost.sum(axis=1) / (float(params.nout))

        prediction = 0.
        i = params.minval
        while i <= params.maxval:
            prediction = prediction + i * X[:, i - 1]
            i += 1


        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + \
                              lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0)

        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + \
                          lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)
        self.cost_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask],
            cost,
            updates=updates)
    def __init__(self, We_initial, params):

        print "WordL1Model"

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_out = lasagne_average_layer([l_emb, l_mask])

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })

        def L2_norm(vec):
            return vec / np.sqrt((vec**2).sum() + 1e-4)

        embg1 = L2_norm(embg1)
        embg2 = L2_norm(embg2)

        gold = 0.
        i = params.minval
        while i <= params.maxval:
            gold = gold + i * scores[:, i - 1]
            i += 1

        dif = (embg1 - embg2).norm(L=1, axis=1)
        sim = T.exp(-dif)
        sim = T.clip(sim, 1e-7, 1 - 1e-7)
        gold = T.clip(gold / 5.0, 1e-7, 1 - 1e-7)

        self.network_params = lasagne.layers.get_all_params(l_out,
                                                            trainable=True)
        self.network_params.pop(0)

        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean((sim - gold)**2) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], sim)
        self.cost_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask],
            cost,
            updates=updates)
    def __init__(self, We_initial, params):

        p = None

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p  #containes We, W, and b

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            updatewords = False

        g1batchindices = T.imatrix()
        g1mask = T.matrix()
        scores = T.matrix()

        if params.traintype == "reg" or params.traintype == "rep":
            W = np.asarray(p[1].get_value(), dtype=config.floatX)
            b = np.asarray(p[2].get_value(), dtype=config.floatX)

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_out = lasagne.layers.DenseLayer(l_average,
                                          params.layersize,
                                          nonlinearity=params.nonlinearity)

        if params.traintype == "reg" or params.traintype == "rep":
            l_in = lasagne.layers.InputLayer((None, None))
            l_mask = lasagne.layers.InputLayer(shape=(None, None))
            l_emb = lasagne.layers.EmbeddingLayer(
                l_in,
                input_size=We.get_value().shape[0],
                output_size=We.get_value().shape[1],
                W=We)
            l_average = lasagne_average_layer([l_emb, l_mask])
            l_out = lasagne.layers.DenseLayer(l_average,
                                              params.layersize,
                                              nonlinearity=params.nonlinearity,
                                              W=W,
                                              b=b)

        embg = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })

        l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sigmoid = lasagne.layers.DenseLayer(
            l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                              2,
                                              nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in2: embg})
        cost = T.nnet.categorical_crossentropy(X, scores)
        prediction = T.argmax(X, axis=1)

        self.network_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg)
        self.scoring_function = theano.function([g1batchindices, g1mask],
                                                prediction)
        self.cost_function = theano.function([scores, g1batchindices, g1mask],
                                             cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices, g1mask],
                                              cost,
                                              updates=updates)
예제 #7
0
    def __init__(self, We_initial, params):

        if params.maxval:
            self.nout = params.maxval - params.minval + 1

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        if params.npc > 0:
            pc = theano.shared(np.asarray(params.pc, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_out = lasagne.layers.DenseLayer(l_average,
                                          params.layersize,
                                          nonlinearity=params.nonlinearity)

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })

        if params.npc <= 0:
            print "#pc <=0, do not remove pc"
        elif params.npc == 1:
            print "#pc == 1"
            proj1 = embg1.dot(pc.transpose())
            proj2 = embg2.dot(pc.transpose())
            embg1 = embg1 - theano.tensor.outer(proj1, pc)
            embg2 = embg2 - theano.tensor.outer(proj2, pc)
        else:
            print "#pc > 1"
            proj1 = embg1.dot(pc.transpose())
            proj2 = embg2.dot(pc.transpose())
            embg1 = embg1 - theano.tensor.dot(proj1, pc)
            embg2 = embg2 - theano.tensor.dot(proj2, pc)

        g1_dot_g2 = embg1 * embg2
        g1_abs_g2 = abs(embg1 - embg2)

        lin_dot = lasagne.layers.InputLayer((None, params.layersize))
        lin_abs = lasagne.layers.InputLayer((None, params.layersize))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(
            l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        if params.task == "sim":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                                  self.nout,
                                                  nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {
                lin_dot: g1_dot_g2,
                lin_abs: g1_abs_g2
            })
            Y = T.log(X)

            cost = scores * (T.log(scores) - Y)
            cost = cost.sum(axis=1) / (float(self.nout))

            prediction = 0.
            i = params.minval
            while i <= params.maxval:
                prediction = prediction + i * X[:, i - 1]
                i += 1
        elif params.task == "ent":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                                  3,
                                                  nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {
                lin_dot: g1_dot_g2,
                lin_abs: g1_abs_g2
            })

            cost = theano.tensor.nnet.categorical_crossentropy(X, scores)

            prediction = T.argmax(X, axis=1)
        else:
            raise ValueError('Params.task not set correctly.')

        self.network_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)
        self.network_params.pop(
            0)  # do not include the word embedding as network parameters
        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)
        self.cost_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost)

        #updates
        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask],
            cost,
            updates=updates)
    def __init__(self, We_initial, params):

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        self.We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        #symbolic params
        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g3batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        g3mask = T.matrix()
        p1mask = T.matrix()

        #get embeddings
        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=self.We.get_value().shape[0],
            output_size=self.We.get_value().shape[1],
            W=self.We)
        l_average = lasagne_average_layer([l_emb, l_mask])

        embg1 = lasagne.layers.get_output(l_average, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_average, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        embg3 = lasagne.layers.get_output(l_average, {
            l_in: g3batchindices,
            l_mask: g3mask
        })
        embp1 = lasagne.layers.get_output(l_average, {
            l_in: p1batchindices,
            l_mask: p1mask
        })

        #objective function
        crt = T.concatenate([embg1, embg2, embg3], axis=1)
        crf = T.concatenate([embg1, embg2, embp1], axis=1)

        l_in2 = lasagne.layers.InputLayer(
            (None, self.We.get_value().shape[1] * 3))
        d1 = lasagne.layers.DenseLayer(
            l_in2,
            self.We.get_value().shape[1],
            nonlinearity=lasagne.nonlinearities.tanh)
        l_sigmoid = lasagne.layers.DenseLayer(
            d1, 1, nonlinearity=lasagne.nonlinearities.sigmoid)
        st = lasagne.layers.get_output(l_sigmoid, {l_in2: crt})
        sf = lasagne.layers.get_output(l_sigmoid, {l_in2: crf})

        cost = params.margin - st + sf
        cost = cost * (cost > 0)

        #self.all_params = lasagne.layers.get_all_params(l_average, trainable=True) + [self.M, self.N]
        self.network_params = lasagne.layers.get_all_params(
            l_average, trainable=True) + lasagne.layers.get_all_params(
                l_sigmoid, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(
            l_average, trainable=True) + lasagne.layers.get_all_params(
                l_sigmoid, trainable=True)
        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in self.network_params)
        #word_reg = 0.5*params.LW*lasagne.regularization.l2(self.We-initial_We) + 0.5*params.LC*self.M.norm(2) + self.N.norm(2)
        word_reg = 0.5 * params.LW * lasagne.regularization.l2(self.We -
                                                               initial_We)
        cost = T.mean(cost) + word_reg + l2

        #feedforward
        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, g3batchindices, p1batchindices,
            g1mask, g2mask, g3mask, p1mask
        ],
                                             cost,
                                             on_unused_input='warn')

        prediction = st * 4 + 1

        self.scoring_function = theano.function([
            g1batchindices, g2batchindices, g3batchindices, g1mask, g2mask,
            g3mask
        ],
                                                prediction,
                                                on_unused_input='warn')

        #updates
        grads = theano.gradient.grad(cost, self.all_params)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.all_params, params.eta)
        self.train_function = theano.function([
            g1batchindices, g2batchindices, g3batchindices, p1batchindices,
            g1mask, g2mask, g3mask, p1mask
        ],
                                              cost,
                                              updates=updates,
                                              on_unused_input='warn')
예제 #9
0
    def __init__(self, We_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        self.dropout = params.dropout
        self.word_dropout = params.word_dropout

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        p2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)

        if params.dropout > 0:
            l_emb = lasagne.layers.DropoutLayer(l_emb, params.dropout)
        elif params.word_dropout > 0:
            l_emb = lasagne.layers.DropoutLayer(l_emb,
                                                params.word_dropout,
                                                shared_axes=(2, ))

        if params.model == "lstm":
            if params.outgate:
                l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                                  params.dim,
                                                  peepholes=True,
                                                  learn_init=False,
                                                  mask_input=l_mask)
            else:
                l_lstm = lasagne_lstm_nooutput_layer(l_emb,
                                                     params.dim,
                                                     peepholes=True,
                                                     learn_init=False,
                                                     mask_input=l_mask)
            l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        elif params.model == "bilstm":
            if params.outgate:
                l_rnn = lasagne.layers.LSTMLayer(l_emb,
                                                 params.dim,
                                                 learn_init=False,
                                                 mask_input=l_mask)
                l_rnnb = lasagne.layers.LSTMLayer(l_emb,
                                                  params.dim,
                                                  learn_init=False,
                                                  mask_input=l_mask,
                                                  backwards=True)
            else:
                l_rnn = lasagne_lstm_nooutput_layer(l_emb,
                                                    params.dim,
                                                    learn_init=False,
                                                    mask_input=l_mask)
                l_rnnb = lasagne_lstm_nooutput_layer(l_emb,
                                                     params.dim,
                                                     learn_init=False,
                                                     mask_input=l_mask,
                                                     backwards=True)

            if not params.sumlayer:
                l_outf = lasagne.layers.SliceLayer(l_rnn, -1, 1)
                l_outb = lasagne.layers.SliceLayer(l_rnnb, -1, 1)

                l_concat = lasagne.layers.ConcatLayer([l_outf, l_outb], axis=1)
                l_out = lasagne.layers.DenseLayer(
                    l_concat,
                    params.dim,
                    nonlinearity=lasagne.nonlinearities.tanh)
            else:
                l_out = lasagne_sum_layer([l_rnn, l_rnnb])
                l_out = lasagne_average_layer([l_out, l_mask], tosum=False)

        elif params.model == "lstmavg":
            if params.outgate:
                l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                                  params.dim,
                                                  peepholes=True,
                                                  learn_init=False,
                                                  mask_input=l_mask)
            else:
                l_lstm = lasagne_lstm_nooutput_layer(l_emb,
                                                     params.dim,
                                                     peepholes=True,
                                                     learn_init=False,
                                                     mask_input=l_mask)

            l_out = lasagne_average_layer([l_lstm, l_mask], tosum=False)

        elif params.model == "bilstmavg":
            if params.outgate:
                l_rnn = lasagne.layers.LSTMLayer(l_emb,
                                                 params.dim,
                                                 learn_init=False,
                                                 mask_input=l_mask)
                l_rnnb = lasagne.layers.LSTMLayer(l_emb,
                                                  params.dim,
                                                  learn_init=False,
                                                  mask_input=l_mask,
                                                  backwards=True)
            else:
                l_rnn = lasagne_lstm_nooutput_layer(l_emb,
                                                    params.dim,
                                                    learn_init=False,
                                                    mask_input=l_mask)
                l_rnnb = lasagne_lstm_nooutput_layer(l_emb,
                                                     params.dim,
                                                     learn_init=False,
                                                     mask_input=l_mask,
                                                     backwards=True)

            if not params.sumlayer:
                l_concat = lasagne.layers.ConcatLayer([l_rnn, l_rnnb], axis=2)
                l_out = lasagne.layers.DenseLayer(
                    l_concat,
                    params.dim,
                    num_leading_axes=-1,
                    nonlinearity=lasagne.nonlinearities.tanh)
                l_out = lasagne_average_layer([l_out, l_mask], tosum=False)
            else:
                l_out = lasagne_sum_layer([l_rnn, l_rnnb])
                l_out = lasagne_average_layer([l_out, l_mask], tosum=False)

        elif params.model == "gran":
            if params.outgate:
                l_lstm = lasagne_gran_layer(l_emb,
                                            params.dim,
                                            peepholes=True,
                                            learn_init=False,
                                            mask_input=l_mask,
                                            gran_type=params.gran_type)
            else:
                l_lstm = lasagne_gran_layer_nooutput_layer(
                    l_emb,
                    params.dim,
                    peepholes=True,
                    learn_init=False,
                    mask_input=l_mask,
                    gran_type=params.gran_type)

            if params.gran_type == 1 or params.gran_type == 2:
                l_out = lasagne_average_layer([l_lstm, l_mask], tosum=False)
            else:
                l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        elif params.model == "bigran":
            if params.outgate:
                l_lstm = lasagne_gran_layer(l_emb,
                                            params.dim,
                                            peepholes=True,
                                            learn_init=False,
                                            mask_input=l_mask,
                                            gran_type=params.gran_type)
                l_lstmb = lasagne_gran_layer(l_emb,
                                             params.dim,
                                             peepholes=True,
                                             learn_init=False,
                                             mask_input=l_mask,
                                             backwards=True)
            else:
                l_lstm = lasagne_gran_layer_nooutput_layer(
                    l_emb,
                    params.dim,
                    peepholes=True,
                    learn_init=False,
                    mask_input=l_mask,
                    gran_type=params.gran_type)
                l_lstmb = lasagne_gran_layer_nooutput_layer(l_emb,
                                                            params.dim,
                                                            peepholes=True,
                                                            learn_init=False,
                                                            mask_input=l_mask,
                                                            backwards=True)

            if not params.sumlayer:
                l_concat = lasagne.layers.ConcatLayer([l_lstm, l_lstmb],
                                                      axis=2)
                l_out = lasagne.layers.DenseLayer(
                    l_concat,
                    params.dim,
                    num_leading_axes=-1,
                    nonlinearity=lasagne.nonlinearities.tanh)
                l_out = lasagne_average_layer([l_out, l_mask], tosum=False)
            else:
                l_out = lasagne_sum_layer([l_lstm, l_lstmb])
                l_out = lasagne_average_layer([l_out, l_mask], tosum=False)

        elif params.model == "wordaverage":
            l_out = lasagne_average_layer([l_emb, l_mask], tosum=False)

        else:
            print "Invalid model specified. Exiting."
            sys.exit(0)

        self.final_layer = l_out

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        },
                                          deterministic=False)
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        },
                                          deterministic=False)
        embp1 = lasagne.layers.get_output(l_out, {
            l_in: p1batchindices,
            l_mask: p1mask
        },
                                          deterministic=False)
        embp2 = lasagne.layers.get_output(l_out, {
            l_in: p2batchindices,
            l_mask: p2mask
        },
                                          deterministic=False)

        t_embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        },
                                            deterministic=True)
        t_embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        },
                                            deterministic=True)

        def fix(x):
            return x * (x > 0) + 1E-10 * (x <= 0)

        #objective function
        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(fix(T.sum(embg1**2, axis=1))) * T.sqrt(
            fix(T.sum(embg2**2, axis=1)))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(fix(T.sum(embp1**2, axis=1))) * T.sqrt(
            fix(T.sum(embg1**2, axis=1)))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(fix(T.sum(embp2**2, axis=1))) * T.sqrt(
            fix(T.sum(embg2**2, axis=1)))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_out, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        self.layer = l_out
        print self.all_params

        #regularization
        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in network_params)
        word_reg = 0.5 * params.LW * lasagne.regularization.l2(We - initial_We)
        cost = T.mean(cost) + l2 + word_reg

        g1g2 = (t_embg1 * t_embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(t_embg1**2, axis=1)) * T.sqrt(
            T.sum(t_embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm
        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    t_embg1)
        prediction = g1g2
        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)

        grads = theano.gradient.grad(cost, self.all_params)
        updates = params.learner(grads, self.all_params, params.eta)

        self.train_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ],
                                              cost,
                                              updates=updates)

        cost = costp1g1 + costp2g2
        cost = T.mean(cost)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ], cost)

        print "Num Params:", lasagne.layers.count_params(self.final_layer)
예제 #10
0
    def __init__(self, We_initial, params):

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        #symbolic params
        g1batchindices = T.imatrix(); g2batchindices = T.imatrix()
        p1batchindices = T.imatrix(); p2batchindices = T.imatrix()
        g1mask = T.matrix(); g2mask = T.matrix()
        p1mask = T.matrix(); p2mask = T.matrix()

        #get embeddings
        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])

        embg1 = lasagne.layers.get_output(l_average, {l_in:g1batchindices, l_mask:g1mask})
        embg2 = lasagne.layers.get_output(l_average, {l_in:g2batchindices, l_mask:g2mask})
        embp1 = lasagne.layers.get_output(l_average, {l_in:p1batchindices, l_mask:p1mask})
        embp2 = lasagne.layers.get_output(l_average, {l_in:p2batchindices, l_mask:p2mask})

        #objective function
        g1g2 = (embg1*embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1*embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2,axis=1)) * T.sqrt(T.sum(embg1**2,axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2*embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1*(costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2*(costp2g2 > 0)

        cost = costp1g1 + costp2g2

        self.all_params = lasagne.layers.get_all_params(l_average, trainable=True)

        word_reg = 0.5*params.LW*lasagne.regularization.l2(We-initial_We)
        cost = T.mean(cost) + word_reg

        #feedforward
        self.feedforward_function = theano.function([g1batchindices,g1mask], embg1)
        self.cost_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                             g1mask, g2mask, p1mask, p2mask], cost)

        prediction = g1g2

        self.scoring_function = theano.function([g1batchindices, g2batchindices,
                             g1mask, g2mask],prediction)

        #updates
        grads = theano.gradient.grad(cost, self.all_params)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.all_params, params.eta)
        self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                             g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
    def __init__(self, We_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        p2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_1 = lasagne.layers.DenseLayer(l_average,
                                        params.hiddensize,
                                        W=lasagne.init.Normal(0.1),
                                        b=lasagne.init.Constant(0.),
                                        nonlinearity=params.nonlinearity)
        l_2 = lasagne.layers.DenseLayer(l_1,
                                        params.hiddensize,
                                        nonlinearity=params.nonlinearity)
        l_3 = lasagne.layers.DenseLayer(l_2,
                                        params.hiddensize,
                                        nonlinearity=params.nonlinearity)
        l_4 = lasagne.layers.DenseLayer(l_3,
                                        params.hiddensize,
                                        nonlinearity=params.nonlinearity)

        l_end = None
        if params.numlayers == 1:
            l_end = l_1
        elif params.numlayers == 2:
            l_end = l_2
        elif params.numlayers == 3:
            l_end = l_3
        elif params.numlayers == 4:
            l_end = l_4
        else:
            raise ValueError('Only 1-4 layers are supported currently.')

        embg1 = lasagne.layers.get_output(l_end, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_end, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        embp1 = lasagne.layers.get_output(l_end, {
            l_in: p1batchindices,
            l_mask: p1mask
        })
        embp2 = lasagne.layers.get_output(l_end, {
            l_in: p2batchindices,
            l_mask: p2mask
        })

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt(
            T.sum(embg1**2, axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_end, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_end, trainable=True)

        #regularization
        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in network_params)
        if params.updatewords:
            word_reg = 0.5 * params.LW * lasagne.regularization.l2(We -
                                                                   initial_We)
            cost = T.mean(cost) + l2 + word_reg
        else:
            cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ], cost)

        prediction = g1g2

        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)

        self.train_function = None
        if params.updatewords:
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, params.clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([
                g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                g1mask, g2mask, p1mask, p2mask
            ],
                                                  cost,
                                                  updates=updates)
        else:
            self.all_params = network_params
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, params.clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([
                g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                g1mask, g2mask, p1mask, p2mask
            ],
                                                  cost,
                                                  updates=updates)
예제 #12
0
    def __init__(self,
                 We_initial,
                 regfile=None,
                 layersize=300,
                 num_filters=4,
                 filter_size=11,
                 margin=0.4,
                 LC=1e-6,
                 LW=1e-6,
                 updatewords=True,
                 clip=1.0,
                 eta=0.025):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        p2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        if regfile is None:
            l_emb = lasagne.layers.EmbeddingLayer(
                l_in,
                input_size=We.get_value().shape[0],
                output_size=We.get_value().shape[1],
                W=We)
            l_average = lasagne_average_layer([l_emb, l_mask])
            l_reshape = lasagne.layers.ReshapeLayer(l_average, ([0], 1, -1))
            l_conv = lasagne.layers.Conv1DLayer(
                l_reshape,
                num_filters=num_filters,
                filter_size=filter_size,
                stride=1,
                pad=filter_size / 2,
                nonlinearity=lasagne.nonlinearities.rectify)
            l_pooling = lasagne.layers.MaxPool1DLayer(l_conv, pool_size=2)
            # (batch_size, params.layersize)
            l_proj = lasagne.layers.DenseLayer(
                l_pooling, layersize, nonlinearity=lasagne.nonlinearities.tanh)
        else:
            logging.debug("loading regfile: {}".format(regfile))
            p = cPickle.load(file(regfile, 'rb'))

            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            W_conv = np.asarray(p[1].get_value(), dtype=config.floatX)
            b_conv = np.asarray(p[2].get_value(), dtype=config.floatX)
            W_proj = np.asarray(p[3].get_value(), dtype=config.floatX)
            b_proj = np.asarray(p[4].get_value(), dtype=config.floatX)

            l_emb = lasagne.layers.EmbeddingLayer(
                l_in,
                input_size=We.get_value().shape[0],
                output_size=We.get_value().shape[1],
                W=We)
            l_average = lasagne_average_layer([l_emb, l_mask])
            l_reshape = lasagne.layers.ReshapeLayer(l_average, ([0], 1, -1))
            l_conv = lasagne.layers.Conv1DLayer(
                l_reshape,
                num_filters=num_filters,
                filter_size=filter_size,
                stride=1,
                pad=filter_size / 2,
                nonlinearity=lasagne.nonlinearities.rectify,
                W=W_conv,
                b=b_conv)
            l_pooling = lasagne.layers.MaxPool1DLayer(l_conv, pool_size=2)
            # (batch_size, params.layersize)
            l_proj = lasagne.layers.DenseLayer(
                l_pooling,
                layersize,
                nonlinearity=lasagne.nonlinearities.tanh,
                W=W_proj,
                b=b_proj)

        embg1 = lasagne.layers.get_output(l_proj, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_proj, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        embp1 = lasagne.layers.get_output(l_proj, {
            l_in: p1batchindices,
            l_mask: p1mask
        })
        embp2 = lasagne.layers.get_output(l_proj, {
            l_in: p2batchindices,
            l_mask: p2mask
        })

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt(
            T.sum(embg1**2, axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_proj, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_proj, trainable=True)

        l2 = 0.5 * LC * sum(
            lasagne.regularization.l2(x) for x in network_params)
        if updatewords:
            word_reg = 0.5 * LW * lasagne.regularization.l2(We - initial_We)
            cost = T.mean(cost) + l2 + word_reg
        else:
            cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ], cost)

        prediction = g1g2

        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)

        self.train_function = None
        if updatewords:
            grads = theano.gradient.grad(cost, self.all_params)
            if clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = lasagne.updates.adam(grads, self.all_params, eta)
            self.train_function = theano.function([
                g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                g1mask, g2mask, p1mask, p2mask
            ],
                                                  cost,
                                                  updates=updates)
        else:
            self.all_params = network_params
            grads = theano.gradient.grad(cost, self.all_params)
            if clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = lasagne.updates.adam(grads, self.all_params, eta)
            self.train_function = theano.function([
                g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                g1mask, g2mask, p1mask, p2mask
            ],
                                                  cost,
                                                  updates=updates)
    def __init__(self, We_initial, params):

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        self.We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        self.M = theano.shared(np.eye(self.We.get_value().shape[1]).astype(
            theano.config.floatX),
                               borrow=True)
        #self.N = theano.shared (np.eye (self.We.get_value().shape[1]).astype (theano.config.floatX), borrow=True)

        #symbolic params
        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g3batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        g3mask = T.matrix()
        p1mask = T.matrix()
        max_gg = T.scalar()
        min_gg = T.scalar()

        #get embeddings
        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=self.We.get_value().shape[0],
            output_size=self.We.get_value().shape[1],
            W=self.We)
        l_average = lasagne_average_layer([l_emb, l_mask])

        rt = lasagne.layers.get_output(l_emb, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        rm = lasagne.layers.get_output(l_emb, {
            l_in: g3batchindices,
            l_mask: g3mask
        })
        pt = T.max(rt, axis=1)
        pm = T.max(rm, axis=1)
        nt = T.min(rt, axis=1)
        nm = T.min(rm, axis=1)
        wt = T.concatenate([pt, nt], axis=1)
        wm = T.concatenate([pm, nm], axis=1)
        g1g2 = (wt * wm).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(wt**2, axis=1)) * T.sqrt(T.sum(wm**2, axis=1))
        g1g2 = g1g2 / g1g2norm
        max_g = (max_gg >= T.max(g1g2)) * max_gg + (max_gg <
                                                    T.max(g1g2)) * T.max(g1g2)
        min_g = (min_gg <= T.min(g1g2)) * min_gg + (min_gg >
                                                    T.min(g1g2)) * T.min(g1g2)
        g1g2 = (g1g2 - min_g) / (max_g - min_g)

        embg1 = lasagne.layers.get_output(l_average, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_average, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        embg3 = lasagne.layers.get_output(l_average, {
            l_in: g3batchindices,
            l_mask: g3mask
        })
        embp1 = lasagne.layers.get_output(l_average, {
            l_in: p1batchindices,
            l_mask: p1mask
        })

        #objective function
        crt = T.nnet.sigmoid(
            T.sum(embg1 * T.dot(embg3, self.M),
                  axis=1))  #+ T.sum(embg2 * T.dot(embg3, self.N), axis=1))
        crf = T.nnet.sigmoid(
            T.sum(embg1 * T.dot(embp1, self.M),
                  axis=1))  #+ T.sum(embg2 * T.dot(embp1, self.N), axis=1))

        cost = params.margin - crt + crf
        cost = cost * (cost > 0)

        #self.all_params = lasagne.layers.get_all_params(l_average, trainable=True) + [self.M, self.N]
        self.all_params = lasagne.layers.get_all_params(
            l_average, trainable=True) + [self.M]

        #word_reg = 0.5*params.LW*lasagne.regularization.l2(self.We-initial_We) + 0.5*params.LC*self.M.norm(2) + self.N.norm(2)
        word_reg = 0.5 * params.LW * lasagne.regularization.l2(
            self.We - initial_We) + 0.5 * params.LC * self.M.norm(2)
        cost = T.mean(cost) + word_reg

        #feedforward
        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, g3batchindices, p1batchindices,
            g1mask, g2mask, g3mask, p1mask
        ],
                                             cost,
                                             on_unused_input='warn')

        prediction = (crt + g1g2) * 2 + 1

        self.scoring_function = theano.function([
            g1batchindices, g2batchindices, g3batchindices, g1mask, g2mask,
            g3mask, max_gg, min_gg
        ],
                                                prediction,
                                                on_unused_input='warn')

        #updates
        grads = theano.gradient.grad(cost, self.all_params)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.all_params, params.eta)
        self.train_function = theano.function([
            g1batchindices, g2batchindices, g3batchindices, p1batchindices,
            g1mask, g2mask, g3mask, p1mask, max_gg, min_gg
        ], [cost, max_g, min_g],
                                              updates=updates,
                                              on_unused_input='warn')
    def __init__(self, We_initial, Wc_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        initial_Wc = theano.shared(np.asarray(Wc_initial, dtype=config.floatX))
        Wc = theano.shared(np.asarray(Wc_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        char_g1batchindices = T.itensor3()
        g2batchindices = T.imatrix()
        char_g2batchindices = T.itensor3()
        p1batchindices = T.imatrix()
        char_p1batchindices = T.itensor3()
        p2batchindices = T.imatrix()
        char_p2batchindices = T.itensor3()
        g1mask = T.matrix()
        char_g1mask = T.tensor3()
        g2mask = T.matrix()
        char_g2mask = T.tensor3()
        p1mask = T.matrix()
        char_p1mask = T.tensor3()
        p2mask = T.matrix()
        char_p2mask = T.tensor3()

        l_in = lasagne.layers.InputLayer((None, None))
        l_char_in = lasagne.layers.InputLayer((None, None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_char_mask = lasagne.layers.InputLayer(shape=(None, None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_char_emb = lasagne.layers.EmbeddingLayer(
            l_char_in,
            input_size=Wc.get_value().shape[0],
            output_size=Wc.get_value().shape[1],
            W=Wc)  #50*6*4*300

        char_embg11 = lasagne.layers.get_output(
            l_char_emb, {l_char_in: char_g1batchindices})
        word_embg11 = lasagne.layers.get_output(l_emb, {l_in: g1batchindices})
        self.char_embg_function = theano.function([char_g1batchindices],
                                                  char_embg11)
        self.word_embg_function = theano.function([g1batchindices],
                                                  word_embg11)
        #char_embg1 = lasagne.layers.get_output(l_char_emb, {l_char_in: char_g1batchindices})
        #self.char_representation_function = theano.function([char_g1batchindices], char_embg1)
        if params.nntype == 'word_char1':
            l_word_representation = word_representation_layer1(
                [l_emb, l_char_emb, l_char_mask])  #lasagne.nonlinearities.tanh
        elif params.nntype == 'word_char2':
            l_word_representation = word_representation_layer4(
                [l_emb, l_char_emb, l_char_mask])
        elif params.nntype == 'word_char3':
            l_word_representation = word_representation_layer5(
                [l_emb, l_char_emb, l_char_mask])
        else:
            print 'something wrong in ppdb_char_word_model !'

        l_average = lasagne_average_layer([l_word_representation, l_mask])

        embg1 = lasagne.layers.get_output(
            l_average, {
                l_in: g1batchindices,
                l_mask: g1mask,
                l_char_in: char_g1batchindices,
                l_char_mask: char_g1mask
            })
        embg2 = lasagne.layers.get_output(
            l_average, {
                l_in: g2batchindices,
                l_mask: g2mask,
                l_char_in: char_g2batchindices,
                l_char_mask: char_g2mask
            })
        embp1 = lasagne.layers.get_output(
            l_average, {
                l_in: p1batchindices,
                l_mask: p1mask,
                l_char_in: char_p1batchindices,
                l_char_mask: char_p1mask
            })
        embp2 = lasagne.layers.get_output(
            l_average, {
                l_in: p2batchindices,
                l_mask: p2mask,
                l_char_in: char_p2batchindices,
                l_char_mask: char_p2mask
            })

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt(
            T.sum(embg1**2, axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_average,
                                                       trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_average,
                                                        trainable=True)

        #regularization
        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in network_params)
        if params.updatewords:
            word_reg = 0.5 * params.LW * lasagne.regularization.l2(We -
                                                                   initial_We)
            char_reg = 0.5 * params.LWC * lasagne.regularization.l2(Wc -
                                                                    initial_Wc)
            cost = T.mean(cost) + l2 + word_reg + char_reg
        else:
            cost = T.mean(cost) + l2

        self.feedforward_function = theano.function(
            [g1batchindices, char_g1batchindices, g1mask, char_g1mask], embg1)
        self.cost_function = theano.function([
            g1batchindices, char_g1batchindices, g2batchindices,
            char_g2batchindices, p1batchindices, char_p1batchindices,
            p2batchindices, char_p2batchindices, g1mask, char_g1mask, g2mask,
            char_g2mask, p1mask, char_p1mask, p2mask, char_p2mask
        ], cost)

        prediction = g1g2

        self.scoring_function = theano.function([
            g1batchindices, char_g1batchindices, g2batchindices,
            char_g2batchindices, g1mask, char_g1mask, g2mask, char_g2mask
        ], prediction)

        self.train_function = None
        if params.updatewords:
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, params.clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([
                g1batchindices, char_g1batchindices, g2batchindices,
                char_g2batchindices, p1batchindices, char_p1batchindices,
                p2batchindices, char_p2batchindices, g1mask, char_g1mask,
                g2mask, char_g2mask, p1mask, char_p1mask, p2mask, char_p2mask
            ],
                                                  cost,
                                                  updates=updates)
        else:
            self.all_params = network_params
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, params.clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([
                g1batchindices, char_g1batchindices, g2batchindices,
                char_g2batchindices, p1batchindices, char_p1batchindices,
                p2batchindices, char_p2batchindices, g1mask, char_g1mask,
                g2mask, char_g2mask, p1mask, char_p1mask, p2mask, char_p2mask
            ],
                                                  cost,
                                                  updates=updates)
    def __init__(self, We_initial, params):

        if params.maxval:
            self.nout = params.maxval - params.minval + 1

        p = None
        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p #containes We, W, and b

        if params.traintype == "reg":
            print "regularizing to parameters"
            raise NotImplementedError("Not implemented for DAN model.")

        if params.traintype == "rep":
            print "not updating embeddings"
            raise NotImplementedError("Not implemented for DAN model.")

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = False

        #symbolic params
        g1batchindices = T.imatrix(); g2batchindices = T.imatrix()
        g1mask = T.matrix(); g2mask = T.matrix()
        scores = T.matrix()

        if params.traintype == "reg" or params.traintype == "rep":
            W = np.asarray(p[1].get_value(), dtype = config.floatX)
            b = np.asarray(p[2].get_value(), dtype = config.floatX)

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, nonlinearity=params.nonlinearity)
        l_2 = lasagne.layers.DenseLayer(l_1, nonlinearity=params.nonlinearity)
        l_3 = lasagne.layers.DenseLayer(l_2, nonlinearity=params.nonlinearity)
        l_4 = lasagne.layers.DenseLayer(l_3, nonlinearity=params.nonlinearity)

        l_out = None
        if params.numlayers == 1:
            l_out = l_1
        elif params.numlayers == 2:
            l_out = l_2
        elif params.numlayers == 3:
            l_out = l_3
        elif params.numlayers == 4:
            l_out = l_4
        else:
            raise ValueError('Only 1-4 layers are supported currently.')

        embg1 = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask})
        embg2 = lasagne.layers.get_output(l_out, {l_in:g2batchindices, l_mask:g2mask})

        g1_dot_g2 = embg1*embg2
        g1_abs_g2 = abs(embg1-embg2)

        lin_dot = lasagne.layers.InputLayer((None, params.layersize))
        lin_abs = lasagne.layers.InputLayer((None, params.layersize))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        if params.task == "sim":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid, self.nout, nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {lin_dot:g1_dot_g2, lin_abs:g1_abs_g2})
            Y = T.log(X)

            cost = scores*(T.log(scores) - Y)
            cost = cost.sum(axis=1)/(float(self.nout))

            prediction = 0.
            i = params.minval
            while i<= params.maxval:
                prediction = prediction + i*X[:,i-1]
                i += 1
        elif params.task == "ent":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 3, nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {lin_dot:g1_dot_g2, lin_abs:g1_abs_g2})

            cost = theano.tensor.nnet.categorical_crossentropy(X,scores)

            prediction = T.argmax(X, axis=1)
        else:
            raise ValueError('Params.task not set correctly.')

        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg1)
        self.scoring_function = theano.function([g1batchindices, g2batchindices,
                             g1mask, g2mask],prediction)
        self.cost_function = theano.function([scores, g1batchindices, g2batchindices,
                             g1mask, g2mask], cost)

        #updates
        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices, g2batchindices,
                             g1mask, g2mask], cost, updates=updates)
예제 #16
0
    def __init__(self, We_initial, params):

        p = None

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p #containes We

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))

        if params.traintype == "rep":
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))

        g1batchindices = T.imatrix()
        g1mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_out = lasagne_average_layer([l_emb, l_mask])

        embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask})

        l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in2:embg})
        cost = T.nnet.categorical_crossentropy(X,scores)
        prediction = T.argmax(X, axis=1)

        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg)
        self.scoring_function = theano.function([g1batchindices,
                             g1mask],prediction)
        self.cost_function = theano.function([scores, g1batchindices,
                             g1mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices,
                             g1mask], cost, updates=updates)
예제 #17
0
    def __init__(self, We_initial, params):

        p = None

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p #containes We, W, and b

        if params.traintype == "reg":
            print "regularizing to parameters"
            raise NotImplementedError("Not implemented for DAN model.")

        if params.traintype == "rep":
            print "not updating embeddings"
            raise NotImplementedError("Not implemented for DAN model.")

        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = False

        g1batchindices = T.imatrix()
        g1mask = T.matrix()
        scores = T.matrix()

        if params.traintype == "reg" or params.traintype == "rep":
            W = np.asarray(p[1].get_value(), dtype = config.floatX)
            b = np.asarray(p[2].get_value(), dtype = config.floatX)

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_average = lasagne_average_layer([l_emb, l_mask])
        l_1 = lasagne.layers.DenseLayer(l_average, params.hiddensize, nonlinearity=params.nonlinearity)
        l_2 = lasagne.layers.DenseLayer(l_1, params.hiddensize, nonlinearity=params.nonlinearity)
        l_3 = lasagne.layers.DenseLayer(l_2, params.hiddensize, nonlinearity=params.nonlinearity)
        l_4 = lasagne.layers.DenseLayer(l_3, params.hiddensize, nonlinearity=params.nonlinearity)

        l_out = None
        if params.numlayers == 1:
            l_out = l_1
        elif params.numlayers == 2:
            l_out = l_2
        elif params.numlayers == 3:
            l_out = l_3
        elif params.numlayers == 4:
            l_out = l_4
        else:
            raise ValueError('Only 1-4 layers are supported currently.')

        embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask})

        l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in2:embg})
        cost = T.nnet.categorical_crossentropy(X,scores)
        prediction = T.argmax(X, axis=1)

        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg)
        self.scoring_function = theano.function([g1batchindices,
                             g1mask],prediction)
        self.cost_function = theano.function([scores, g1batchindices,
                             g1mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices,
                             g1mask], cost, updates=updates)