Esempio n. 1
0
class jPosDepLearner:
    def __init__(self, vocab, pos, rels, w2i, c2i, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        #self.trainer = SimpleSGDTrainer(self.model)
        self.activations = {
            'tanh': tanh,
            'sigmoid': logistic,
            'relu': rectify,
            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for ind, word in enumerate(pos)}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels

        self.external_embedding, self.edim = None, 0
        if options.external_embedding is not None:
            external_embedding_fp = open(options.external_embedding, 'r')
            external_embedding_fp.readline()
            self.external_embedding = {
                line.split(' ')[0]:
                [float(f) for f in line.strip().split(' ')[1:]]
                for line in external_embedding_fp
            }
            external_embedding_fp.close()

            self.edim = len(self.external_embedding.values()[0])
            self.noextrn = [0.0 for _ in xrange(self.edim)]
            self.extrnd = {
                word: i + 3
                for i, word in enumerate(self.external_embedding)
            }
            self.elookup = self.model.add_lookup_parameters(
                (len(self.external_embedding) + 3, self.edim))
            for word, i in self.extrnd.iteritems():
                self.elookup.init_row(i, self.external_embedding[word])
            self.extrnd['*PAD*'] = 1
            self.extrnd['*INITIAL*'] = 2

            print 'Load external embedding. Vector dimensions', self.edim

        if self.bibiFlag:
            self.builders = [
                VanillaLSTMBuilder(1, self.wdims + self.edim + self.cdims * 2,
                                   self.ldims, self.model),
                VanillaLSTMBuilder(1, self.wdims + self.edim + self.cdims * 2,
                                   self.ldims, self.model)
            ]
            self.bbuilders = [
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)
            ]
        elif self.layers > 0:
            self.builders = [
                VanillaLSTMBuilder(self.layers, self.wdims + self.edim,
                                   self.ldims, self.model),
                VanillaLSTMBuilder(self.layers, self.wdims + self.edim,
                                   self.ldims, self.model)
            ]
        else:
            self.builders = [
                SimpleRNNBuilder(1, self.wdims + self.edim + self.cdims * 2,
                                 self.ldims, self.model),
                SimpleRNNBuilder(1, self.wdims + self.edim + self.cdims * 2,
                                 self.ldims, self.model)
            ]

        self.ffSeqPredictor = FFSequencePredictor(
            Layer(self.model, self.ldims * 2, len(self.pos), softmax))

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))

        self.hidLayerFOH = self.model.add_parameters(
            (self.hidden_units, self.ldims * 2))
        self.hidLayerFOM = self.model.add_parameters(
            (self.hidden_units, self.ldims * 2))
        self.hidBias = self.model.add_parameters((self.hidden_units))

        self.hid2Layer = self.model.add_parameters(
            (self.hidden2_units, self.hidden_units))
        self.hid2Bias = self.model.add_parameters((self.hidden2_units))

        self.outLayer = self.model.add_parameters(
            (1, self.hidden2_units
             if self.hidden2_units > 0 else self.hidden_units))

        if self.labelsFlag:
            self.rhidLayerFOH = self.model.add_parameters(
                (self.hidden_units, 2 * self.ldims))
            self.rhidLayerFOM = self.model.add_parameters(
                (self.hidden_units, 2 * self.ldims))
            self.rhidBias = self.model.add_parameters((self.hidden_units))

            self.rhid2Layer = self.model.add_parameters(
                (self.hidden2_units, self.hidden_units))
            self.rhid2Bias = self.model.add_parameters((self.hidden2_units))

            self.routLayer = self.model.add_parameters(
                (len(self.irels), self.hidden2_units
                 if self.hidden2_units > 0 else self.hidden_units))
            self.routBias = self.model.add_parameters((len(self.irels)))

        self.char_rnn = RNNSequencePredictor(
            LSTMBuilder(1, self.cdims, self.cdims, self.model))

    def __getExpr(self, sentence, i, j, train):

        if sentence[i].headfov is None:
            sentence[i].headfov = self.hidLayerFOH.expr() * concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].modfov is None:
            sentence[j].modfov = self.hidLayerFOM.expr() * concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        if self.hidden2_units > 0:
            output = self.outLayer.expr() * self.activation(
                self.hid2Bias.expr() + self.hid2Layer.expr() *
                self.activation(sentence[i].headfov + sentence[j].modfov +
                                self.hidBias.expr()))  # + self.outBias
        else:
            output = self.outLayer.expr() * self.activation(
                sentence[i].headfov + sentence[j].modfov +
                self.hidBias.expr())  # + self.outBias

        return output

    def __evaluate(self, sentence, train):
        exprs = [[
            self.__getExpr(sentence, i, j, train)
            for j in xrange(len(sentence))
        ] for i in xrange(len(sentence))]
        scores = np.array([[output.scalar_value() for output in exprsRow]
                           for exprsRow in exprs])

        return scores, exprs

    def pick_neg_log(self, pred, gold):
        return -dynet.log(dynet.pick(pred, gold))

    def __evaluateLabel(self, sentence, i, j):
        if sentence[i].rheadfov is None:
            sentence[i].rheadfov = self.rhidLayerFOH.expr() * concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].rmodfov is None:
            sentence[j].rmodfov = self.rhidLayerFOM.expr() * concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        if self.hidden2_units > 0:
            output = self.routLayer.expr() * self.activation(
                self.rhid2Bias.expr() + self.rhid2Layer.expr() *
                self.activation(sentence[i].rheadfov + sentence[j].rmodfov +
                                self.rhidBias.expr())) + self.routBias.expr()
        else:
            output = self.routLayer.expr() * self.activation(
                sentence[i].rheadfov + sentence[j].rmodfov +
                self.rhidBias.expr()) + self.routBias.expr()

        return output.value(), output

    def Save(self, filename):
        self.model.save(filename)

    def Load(self, filename):
        self.model.populate(filename)

    def Predict(self, conll_path):
        with open(conll_path, 'r') as conllFP:
            for iSentence, sentence in enumerate(read_conll(conllFP,
                                                            self.c2i)):
                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]

                for entry in conll_sentence:
                    wordvec = self.wlookup[int(self.vocab.get(
                        entry.norm, 0))] if self.wdims > 0 else None
                    evec = self.elookup[int(
                        self.extrnd.get(entry.form,
                                        self.extrnd.get(entry.norm, 0))
                    )] if self.external_embedding is not None else None

                    last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in reversed(entry.idChars)])[-1]

                    entry.vec = concatenate(
                        filter(None,
                               [wordvec, evec, last_state, rev_last_state]))

                    entry.lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(conll_sentence,
                                             reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in conll_sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(
                                rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                scores, exprs = self.__evaluate(conll_sentence, True)
                heads = decoder.parse_proj(scores)

                #Multiple roots: heading to the previous "rooted" one
                rootCount = 0
                rootWid = -1
                for index, head in enumerate(heads):
                    if head == 0:
                        rootCount += 1
                        if rootCount == 1:
                            rootWid = index
                        if rootCount > 1:
                            heads[index] = rootWid
                            rootWid = index

                concat_layer = [
                    concatenate(entry.lstms) for entry in conll_sentence
                ]
                outputFFlayer = self.ffSeqPredictor.predict_sequence(
                    concat_layer)
                predicted_pos_indices = [
                    np.argmax(o.value()) for o in outputFFlayer
                ]
                predicted_postags = [
                    self.id2pos[idx] for idx in predicted_pos_indices
                ]

                for entry, head, pos in zip(conll_sentence, heads,
                                            predicted_postags):
                    entry.pred_parent_id = head
                    entry.pred_relation = '_'
                    entry.pred_pos = pos

                dump = False

                if self.labelsFlag:
                    for modifier, head in enumerate(heads[1:]):
                        scores, exprs = self.__evaluateLabel(
                            conll_sentence, head, modifier + 1)
                        conll_sentence[modifier +
                                       1].pred_relation = self.irels[max(
                                           enumerate(scores),
                                           key=itemgetter(1))[0]]

                renew_cg()
                if not dump:
                    yield sentence

    def Train(self, conll_path):
        errors = 0
        batch = 0
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()

        with open(conll_path, 'r') as conllFP:
            shuffledData = list(read_conll(conllFP, self.c2i))
            random.shuffle(shuffledData)

            errs = []
            lerrs = []
            posErrs = []
            eeloss = 0.0

            for iSentence, sentence in enumerate(shuffledData):
                if iSentence % 500 == 0 and iSentence != 0:
                    print "Processing sentence number: %d" % iSentence, ", Loss: %.2f" % (
                        eloss / etotal), ", Time: %.2f" % (time.time() - start)
                    start = time.time()
                    eerrors = 0
                    eloss = 0.0
                    etotal = 0
                    lerrors = 0
                    ltotal = 0

                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]

                for entry in conll_sentence:
                    c = float(self.wordsCount.get(entry.norm, 0))
                    dropFlag = (random.random() < (c / (0.25 + c)))
                    wordvec = self.wlookup[
                        int(self.vocab.get(entry.norm, 0)
                            ) if dropFlag else 0] if self.wdims > 0 else None
                    evec = None

                    if self.external_embedding is not None:
                        evec = self.elookup[self.extrnd.get(
                            entry.form, self.extrnd.get(entry.norm, 0)) if
                                            (dropFlag or
                                             (random.random() < 0.5)) else 0]
                    #entry.vec = concatenate(filter(None, [wordvec, evec]))

                    last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in reversed(entry.idChars)])[-1]

                    entry.vec = concatenate([
                        dynet.noise(fe, 0.2) for fe in filter(
                            None, [wordvec, evec, last_state, rev_last_state])
                    ])

                    entry.lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(conll_sentence,
                                             reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in conll_sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(
                                rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                scores, exprs = self.__evaluate(conll_sentence, True)
                gold = [entry.parent_id for entry in conll_sentence]
                heads = decoder.parse_proj(scores,
                                           gold if self.costaugFlag else None)

                if self.labelsFlag:
                    for modifier, head in enumerate(gold[1:]):
                        rscores, rexprs = self.__evaluateLabel(
                            conll_sentence, head, modifier + 1)
                        goldLabelInd = self.rels[conll_sentence[modifier +
                                                                1].relation]
                        wrongLabelInd = max(((l, scr)
                                             for l, scr in enumerate(rscores)
                                             if l != goldLabelInd),
                                            key=itemgetter(1))[0]
                        if rscores[goldLabelInd] < rscores[wrongLabelInd] + 1:
                            lerrs.append(rexprs[wrongLabelInd] -
                                         rexprs[goldLabelInd])

                e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g])
                eerrors += e
                if e > 0:
                    loss = [(exprs[h][i] - exprs[g][i])
                            for i, (h, g) in enumerate(zip(heads, gold))
                            if h != g]  # * (1.0/float(e))
                    eloss += (e)
                    mloss += (e)
                    errs.extend(loss)

                etotal += len(conll_sentence)

                concat_layer = [
                    concatenate(entry.lstms) for entry in conll_sentence
                ]
                concat_layer = [dynet.noise(fe, 0.2) for fe in concat_layer]
                outputFFlayer = self.ffSeqPredictor.predict_sequence(
                    concat_layer)
                posIDs = [self.pos.get(entry.pos) for entry in conll_sentence]
                for pred, gold in zip(outputFFlayer, posIDs):
                    posErrs.append(self.pick_neg_log(pred, gold))

                if iSentence % 1 == 0 or len(errs) > 0 or len(
                        lerrs) > 0 or len(posErrs) > 0:
                    eeloss = 0.0

                    if len(errs) > 0 or len(lerrs) > 0 or len(posErrs) > 0:
                        eerrs = (esum(errs + lerrs + posErrs)
                                 )  #* (1.0/(float(len(errs))))
                        eerrs.scalar_value()
                        eerrs.backward()
                        self.trainer.update()
                        errs = []
                        lerrs = []
                        posErrs = []

                    renew_cg()

        if len(errs) > 0:
            eerrs = (esum(errs + lerrs + posErrs))  #* (1.0/(float(len(errs))))
            eerrs.scalar_value()
            eerrs.backward()
            self.trainer.update()

            errs = []
            lerrs = []
            posErrs = []
            eeloss = 0.0

            renew_cg()

        self.trainer.update()
        print "Loss: %.2f" % (mloss / iSentence)
Esempio n. 2
0
class jPosDepLearner:
    def __init__(self, vocab, pos, rels, w2i, c2i, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        #if options.learning_rate is not None:
        #    self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
        #    print("Adam initial learning rate:", options.learning_rate)
        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for ind, word in enumerate(pos)}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels
        self.pdims = options.pembedding_dims

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims))

        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=True)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                _word = unicode(word, "utf-8")
                if _word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word], ext_embeddings[_word])
            print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count))

        self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)]
        self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]

        if self.bibiFlag:
            self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)]
            self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)]

        self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax))

        self.hidden_units = options.hidden_units
        
        self.hidBias = self.model.add_parameters((self.ldims * 8))
        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
        self.hid2Bias = self.model.add_parameters((self.hidden_units))

        self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8))

        if self.labelsFlag:
            self.rhidBias = self.model.add_parameters((self.ldims * 8))
            self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
            self.rhid2Bias = self.model.add_parameters((self.hidden_units))
            self.routLayer = self.model.add_parameters(
                (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8))
            self.routBias = self.model.add_parameters((len(self.irels)))
            self.ffRelPredictor = FFSequencePredictor(
                Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels),
                      softmax))

        self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model))

    def __getExpr(self, sentence, i, j):

        if sentence[i].headfov is None:
            sentence[i].headfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].modfov is None:
            sentence[j].modfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]])

        _inputVector = concatenate(
            [sentence[i].headfov, sentence[j].modfov, dynet.abs(sentence[i].headfov - sentence[j].modfov),
             dynet.cmult(sentence[i].headfov, sentence[j].modfov)])

        if self.hidden_units > 0:
            output = self.outLayer.expr() * self.activation(
                self.hid2Bias.expr() + self.hidLayer.expr() * self.activation(
                    _inputVector + self.hidBias.expr()))
        else:
            output = self.outLayer.expr() * self.activation(_inputVector + self.hidBias.expr())

        return output

    def __evaluate(self, sentence):
        exprs = [[self.__getExpr(sentence, i, j) for j in xrange(len(sentence))] for i in xrange(len(sentence))]
        scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs])

        return scores, exprs

    def pick_neg_log(self, pred, gold):
        return -dynet.log(dynet.pick(pred, gold))

    def __getRelVector(self, sentence, i, j):
        if sentence[i].rheadfov is None:
            sentence[i].rheadfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].rmodfov is None:
            sentence[j].rmodfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]])
        _outputVector = concatenate(
            [sentence[i].rheadfov, sentence[j].rmodfov, abs(sentence[i].rheadfov - sentence[j].rmodfov),
             cmult(sentence[i].rheadfov, sentence[j].rmodfov)])

        if self.hidden_units > 0:
            return self.rhid2Bias.expr() + self.rhidLayer.expr() * self.activation(
                _outputVector + self.rhidBias.expr())
        else:
            return _outputVector

    def Save(self, filename):
        self.model.save(filename)

    def Load(self, filename):
        self.model.populate(filename)

    def PredictFromText(self, text_file,output_file):
        #pdb.set_trace()
        #with open(output_file,'w',buffering=20*(1024**2)) as ofp:
        #    with open(text_file, 'r',buffering=20*(1024**2)) as t_fp:
        with open(output_file,'w') as ofp:
            with open(text_file, 'r') as t_fp:
                try:
                    for line in t_fp:
                        itf = tempfile.NamedTemporaryFile(delete=False)  
                        otf = tempfile.NamedTemporaryFile(delete=False)  
                        with open(itf.name,'w'):
                            itf.write(line)
                            itf.flush()
                        conllConvertToFile(itf.name,otf.name)
                        conll_gen = self.Predict(otf.name)
                        for sentence in conll_gen:
                            for entry in sentence[1:]:
                                fields = str(entry).split('\t')
                                if (len(fields) > 1):
                                    ofp.write(fields[0] + '\t' + fields[1] + '\t' + fields[3] + '\t' + fields[6] + '\t' + fields[7]  + '\n')
                                else:
                                    ofp.write(str(entry) + '\n')
                            ofp.write('\n')
                        os.remove(itf.name)
                        os.remove(otf.name)
                except RuntimeError as re:
                    print("Unexpected error:", sys.exc_info()[0])
                    traceback.print_exc(file=sys.stdout)
                    #instead of pass clear
                    sys.exc_clear()
                

    def Predict(self, conll_path):
        with open(conll_path, 'r') as conllFP:
            for iSentence, sentence in enumerate(read_conll_predict(conllFP, self.c2i, self.wordsCount)):
                conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)]

                for entry in conll_sentence:
                    wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None

                    last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[
                        -1]

                    entry.vec = concatenate(filter(None, [wordvec, last_state, rev_last_state]))

                    entry.pos_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                #Predicted pos tags
                lstm_forward = self.pos_builders[0].initial_state()
                lstm_backward = self.pos_builders[1].initial_state()
                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.pos_lstms[1] = lstm_forward.output()
                    rentry.pos_lstms[0] = lstm_backward.output()

                for entry in conll_sentence:
                    entry.pos_vec = concatenate(entry.pos_lstms)

                blstm_forward = self.pos_bbuilders[0].initial_state()
                blstm_backward = self.pos_bbuilders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.pos_vec)
                    blstm_backward = blstm_backward.add_input(rentry.pos_vec)
                    entry.pos_lstms[1] = blstm_forward.output()
                    rentry.pos_lstms[0] = blstm_backward.output()

                concat_layer = [concatenate(entry.pos_lstms) for entry in conll_sentence]
                outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer)
                predicted_pos_indices = [np.argmax(o.value()) for o in outputFFlayer]
                predicted_postags = [self.id2pos[idx] for idx in predicted_pos_indices]

                # Add predicted pos tags for parsing prediction
                for entry, posid in zip(conll_sentence, predicted_pos_indices):
                    entry.vec = concatenate([entry.vec, self.plookup[posid]])
                    entry.lstms = [entry.vec, entry.vec]

                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in conll_sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                scores, exprs = self.__evaluate(conll_sentence)
                heads = decoder.parse_proj(scores)

                # Multiple roots: heading to the previous "rooted" one
                rootCount = 0
                rootWid = -1
                for index, head in enumerate(heads):
                    if head == 0:
                        rootCount += 1
                        if rootCount == 1:
                            rootWid = index
                        if rootCount > 1:
                            heads[index] = rootWid
                            rootWid = index

                for entry, head, pos in zip(conll_sentence, heads, predicted_postags):
                    entry.pred_parent_id = head
                    entry.pred_relation = '_'
                    entry.pred_pos = pos

                dump = False

                if self.labelsFlag:
                    concat_layer = [self.__getRelVector(conll_sentence, head, modifier + 1) for modifier, head in
                                    enumerate(heads[1:])]
                    outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer)
                    predicted_rel_indices = [np.argmax(o.value()) for o in outputFFlayer]
                    predicted_rels = [self.irels[idx] for idx in predicted_rel_indices]
                    for modifier, head in enumerate(heads[1:]):
                        conll_sentence[modifier + 1].pred_relation = predicted_rels[modifier]

                renew_cg()
                if not dump:
                    yield sentence

    def Train(self, conll_path):
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()

        with open(conll_path, 'r') as conllFP:
            shuffledData = list(read_conll(conllFP, self.c2i))
            random.shuffle(shuffledData)

            errs = []
            lerrs = []
            posErrs = []

            for iSentence, sentence in enumerate(shuffledData):
                if iSentence % 500 == 0 and iSentence != 0:
                    print "Processing sentence number: %d" % iSentence, ", Loss: %.4f" % (
                                eloss / etotal), ", Time: %.2f" % (time.time() - start)
                    start = time.time()
                    eerrors = 0
                    eloss = 0.0
                    etotal = 0

                conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)]

                for entry in conll_sentence:
                    c = float(self.wordsCount.get(entry.norm, 0))
                    dropFlag = (random.random() < (c / (0.25 + c)))
                    wordvec = self.wlookup[
                        int(self.vocab.get(entry.norm, 0)) if dropFlag else 0] if self.wdims > 0 else None

                    last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[
                        -1]

                    entry.vec = dynet.dropout(concatenate(filter(None, [wordvec, last_state, rev_last_state])), 0.33)

                    entry.pos_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                #POS tagging loss
                lstm_forward = self.pos_builders[0].initial_state()
                lstm_backward = self.pos_builders[1].initial_state()
                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.pos_lstms[1] = lstm_forward.output()
                    rentry.pos_lstms[0] = lstm_backward.output()

                for entry in conll_sentence:
                    entry.pos_vec = concatenate(entry.pos_lstms)

                blstm_forward = self.pos_bbuilders[0].initial_state()
                blstm_backward = self.pos_bbuilders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.pos_vec)
                    blstm_backward = blstm_backward.add_input(rentry.pos_vec)
                    entry.pos_lstms[1] = blstm_forward.output()
                    rentry.pos_lstms[0] = blstm_backward.output()

                concat_layer = [dynet.dropout(concatenate(entry.pos_lstms), 0.33) for entry in conll_sentence]
                outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer)
                posIDs = [self.pos.get(entry.pos) for entry in conll_sentence]
                for pred, gold in zip(outputFFlayer, posIDs):
                    posErrs.append(self.pick_neg_log(pred, gold))

                # Add predicted pos tags
                for entry, poses in zip(conll_sentence, outputFFlayer):
                    entry.vec = concatenate([entry.vec, dynet.dropout(self.plookup[np.argmax(poses.value())], 0.33)])
                    entry.lstms = [entry.vec, entry.vec]

                #Parsing losses
                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in conll_sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                scores, exprs = self.__evaluate(conll_sentence)
                gold = [entry.parent_id for entry in conll_sentence]
                heads = decoder.parse_proj(scores, gold if self.costaugFlag else None)

                if self.labelsFlag:

                    concat_layer = [dynet.dropout(self.__getRelVector(conll_sentence, head, modifier + 1), 0.33) for
                                    modifier, head in enumerate(gold[1:])]
                    outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer)
                    relIDs = [self.rels[conll_sentence[modifier + 1].relation] for modifier, _ in enumerate(gold[1:])]
                    for pred, goldid in zip(outputFFlayer, relIDs):
                        lerrs.append(self.pick_neg_log(pred, goldid))

                e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g])
                eerrors += e
                if e > 0:
                    loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g]  # * (1.0/float(e))
                    eloss += (e)
                    mloss += (e)
                    errs.extend(loss)

                etotal += len(conll_sentence)

                if iSentence % 1 == 0:
                    if len(errs) > 0 or len(lerrs) > 0 or len(posErrs) > 0:
                        eerrs = (esum(errs + lerrs + posErrs))
                        eerrs.scalar_value()
                        eerrs.backward()
                        self.trainer.update()
                        errs = []
                        lerrs = []
                        posErrs = []

                    renew_cg()

        print "Loss: %.4f" % (mloss / iSentence)
class jPosDepLearner:
    def __init__(self, vocab, pos, rels, w2i, c2i, caps, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        #if options.learning_rate is not None:
        #    self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
        #    print("Adam initial learning rate:", options.learning_rate)
        self.activations = {
            'tanh': tanh,
            'sigmoid': logistic,
            'relu': rectify,
            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag
        self.depFlag = options.depFlag
        self.sNerFlag = options.sNerFlag
        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.reldims = options.relembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for ind, word in enumerate(pos)}
        self.c2i = c2i
        self.caps = {word: ind for ind, word in enumerate(caps)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels
        self.pdims = options.pembedding_dims
        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims))
        self.caps_lookup = self.model.add_lookup_parameters(
            (len(caps), self.cdims))
        transition_array = np.random.rand(len(pos) + 2, len(pos) + 2)

        #cap_array=np.random.rand(len(caps),len(pos))
        def normalizeprobs(arr):
            return np.array([np.divide(arr1, sum(arr1)) for arr1 in arr])

        self.nertrans_lookup = self.model.add_lookup_parameters(
            (len(pos) + 2, len(pos) + 2))
        #self.caplookup = self.model.lookup_parameters_from_numpy(normalizeprobs(cap_array))
        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(
                options.external_embedding, lower=True)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                _word = unicode(word, "utf-8")
                if _word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word],
                                          ext_embeddings[_word])
            print("Vocab size: %d; #words having pretrained vectors: %d" %
                  (len(self.vocab), count))

        self.ffSeqPredictor = FFSequencePredictor(
            Layer(self.model, self.ldims * 2, len(self.pos), "idt"))

        self.hidden_units = options.hidden_units

        if not self.depFlag:

            self.pos_builders = [
                VanillaLSTMBuilder(1, self.wdims + self.cdims * 3, self.ldims,
                                   self.model),
                VanillaLSTMBuilder(1, self.wdims + self.cdims * 3, self.ldims,
                                   self.model)
            ]
            self.pos_bbuilders = [
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)
            ]

            self.ffSeqPredictor = FFSequencePredictor(
                Layer(self.model, self.ldims * 2, len(self.pos), softmax))

            self.hidden_units = options.hidden_units

        if self.depFlag:

            if self.bibiFlag:
                self.builders = [
                    VanillaLSTMBuilder(1, self.wdims + self.cdims * 3,
                                       self.ldims, self.model),
                    VanillaLSTMBuilder(1, self.wdims + self.cdims * 3,
                                       self.ldims, self.model)
                ]
                self.bbuilders = [
                    VanillaLSTMBuilder(1, self.ldims * 2, self.ldims,
                                       self.model),
                    VanillaLSTMBuilder(1, self.ldims * 2, self.ldims,
                                       self.model)
                ]
            elif self.layers > 0:
                self.builders = [
                    VanillaLSTMBuilder(self.layers,
                                       self.wdims + self.cdims * 3, self.ldims,
                                       self.model),
                    VanillaLSTMBuilder(self.layers,
                                       self.wdims + self.cdims * 3, self.ldims,
                                       self.model)
                ]
            else:
                self.builders = [
                    SimpleRNNBuilder(1, self.wdims + self.cdims * 3,
                                     self.ldims, self.model),
                    SimpleRNNBuilder(1, self.wdims + self.cdims * 3,
                                     self.ldims, self.model)
                ]

            self.hidBias = self.model.add_parameters((self.ldims * 8))
            self.hidLayer = self.model.add_parameters(
                (self.hidden_units, self.ldims * 8))
            self.hid2Bias = self.model.add_parameters((self.hidden_units))

            self.outLayer = self.model.add_parameters(
                (1,
                 self.hidden_units if self.hidden_units > 0 else self.ldims *
                 8))

            if self.labelsFlag:
                self.rhidBias = self.model.add_parameters((self.ldims * 8))
                self.rhidLayer = self.model.add_parameters(
                    (self.hidden_units, self.ldims * 8))
                self.rhid2Bias = self.model.add_parameters((self.hidden_units))
                self.routLayer = self.model.add_parameters(
                    (len(self.irels), self.hidden_units
                     if self.hidden_units > 0 else self.ldims * 8))
                self.routBias = self.model.add_parameters((len(self.irels)))
                self.ffRelPredictor = FFSequencePredictor(
                    Layer(
                        self.model, self.hidden_units
                        if self.hidden_units > 0 else self.ldims * 8,
                        len(self.irels), softmax))

            if self.sNerFlag:
                self.sner_builders = [
                    VanillaLSTMBuilder(
                        1, self.wdims + self.cdims * 3 + self.reldims,
                        self.ldims, self.model),
                    VanillaLSTMBuilder(
                        1, self.wdims + self.cdims * 3 + self.reldims,
                        self.ldims, self.model)
                ]

                self.sner_bbuilders = [
                    VanillaLSTMBuilder(1, self.ldims * 2, self.ldims,
                                       self.model),
                    VanillaLSTMBuilder(1, self.ldims * 2, self.ldims,
                                       self.model)
                ]
                ##relation embeddings
                self.rellookup = self.model.add_lookup_parameters(
                    (len(self.rels), self.reldims))

        self.char_rnn = RNNSequencePredictor(
            LSTMBuilder(1, self.cdims, self.cdims, self.model))

    def __getExpr(self, sentence, i, j):

        if sentence[i].headfov is None:
            sentence[i].headfov = concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].modfov is None:
            sentence[j].modfov = concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        _inputVector = concatenate([
            sentence[i].headfov, sentence[j].modfov,
            dynet.abs(sentence[i].headfov - sentence[j].modfov),
            dynet.cmult(sentence[i].headfov, sentence[j].modfov)
        ])

        if self.hidden_units > 0:
            output = self.outLayer.expr() * self.activation(
                self.hid2Bias.expr() + self.hidLayer.expr() *
                self.activation(_inputVector + self.hidBias.expr()))
        else:
            output = self.outLayer.expr() * self.activation(
                _inputVector + self.hidBias.expr())

        return output

    def __evaluate(self, sentence):
        exprs = [[
            self.__getExpr(sentence, i, j) for j in xrange(len(sentence))
        ] for i in xrange(len(sentence))]
        scores = np.array([[output.scalar_value() for output in exprsRow]
                           for exprsRow in exprs])

        return scores, exprs

    def pick_neg_log(self, pred, gold):
        return -dynet.log(dynet.pick(pred, gold))

    def pick_neg_log_2(self, pred_param, gold):
        gold_arr = inputVector(
            [1 if gold == i else 0 for i in range(len(self.pos) + 2)])
        x = scalarInput(1)
        pred_arr = softmax(pred_param * x)
        return -dynet.log(transpose(pred_arr) * gold_arr)

    def pick_gold_score(self, preds, golds):
        score = 0
        prev_tag = len(self.pos)
        for pred, gold in zip(preds, golds):
            score += dynet.pick(pred, gold) + dynet.pick(
                self.nertrans_lookup[gold], prev_tag)
            prev_tag = gold
        score += dynet.pick(self.nertrans_lookup[len(self.pos) + 1], prev_tag)
        return score

    def pick_crf_score(self, preds, golds):
        return dynet.exp(
            self.pick_gold_tag_score(preds, golds) +
            self.pick_gold_trans_score(golds))

    def forward_score(self, preds):
        def log_sum_exp(tag_score_arr):
            argmax = np.argmax(tag_score_arr.value())
            max_score = tag_score_arr[argmax]
            score = max_score
            max_arr = dynet.concatenate(
                [max_score for i in range(len(self.pos) + 2)])
            score += dynet.log(
                dynet.sum_dim(dynet.exp(tag_score_arr - max_arr), [0]))
            return score

        score = 0
        len1 = len(self.pos) + 2
        for_score = [-1e10 for i in range(len1)]
        for_score[-2] = 0
        #print(len(preds))
        for i, pred in enumerate(preds):
            tag_scores = [dynet.scalarInput(-1e10) for j in range(len1)]
            for i, score in enumerate(pred):
                tag_score = dynet.concatenate([
                    score + dynet.pick(self.nertrans_lookup[i], prev_tag) +
                    for_score[prev_tag] for prev_tag in range(len1)
                ])
                log_1 = log_sum_exp(tag_score)
                tag_scores[i] = log_1
                #print("tag score: %f"%log_1.value())
            for_score = tag_scores
            #print(dynet.concatenate(for_score).value())
        term_exp = dynet.concatenate([
            score + tr
            for score, tr in zip(for_score, self.nertrans_lookup[len(self.pos)
                                                                 + 1])
        ])
        term_score = log_sum_exp(term_exp)
        #print("score : %f"%term_score.value())
        return term_score

    def nextPerm(self, perm1, taglen):
        a = []
        for ind, x in enumerate(reversed(perm1)):
            if x < taglen - 1:
                for i in range(len(perm1) - ind - 1):
                    a.append(perm1[i])
                a.append(x + 1)
                for i in range(ind):
                    a.append(0)
                return a
        return -1

    ## takes toooo long
    def forward_score2(self, taglen, senlen, preds):
        score = 0
        perm1 = [0 for i in range(senlen)]
        while perm1 != -1:
            score += self.pick_crf_score(preds, perm1)
            perm1 = self.nextPerm(perm1, taglen)
        return score

    def __getRelVector(self, sentence, i, j):
        if sentence[i].rheadfov is None:
            sentence[i].rheadfov = concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].rmodfov is None:
            sentence[j].rmodfov = concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])
        _outputVector = concatenate([
            sentence[i].rheadfov, sentence[j].rmodfov,
            abs(sentence[i].rheadfov - sentence[j].rmodfov),
            cmult(sentence[i].rheadfov, sentence[j].rmodfov)
        ])

        if self.hidden_units > 0:
            return self.rhid2Bias.expr() + self.rhidLayer.expr(
            ) * self.activation(_outputVector + self.rhidBias.expr())
        else:
            return _outputVector

    def Save(self, filename):
        self.model.save(filename)

    def Load(self, filename):
        self.model.populate(filename)

    def Predict(self, conll_path, dep_epoch=1, ner_epoch=1):
        with open(conll_path, 'r') as conllFP:
            if ner_epoch == 0:
                read_conll_nerdep = read_conll_predict(conllFP, self.c2i,
                                                       self.wordsCount)
            else:
                read_conll_nerdep = read_conll_predict_ner(
                    conllFP, self.c2i, self.wordsCount)
            for iSentence, sentence in enumerate(read_conll_nerdep):
                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]

                for entry in conll_sentence:
                    capvec = self.caps_lookup[entry.capInfo]
                    wordvec = self.wlookup[int(self.vocab.get(
                        entry.norm, 0))] if self.wdims > 0 else None

                    last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in reversed(entry.idChars)])[-1]

                    entry.vec = concatenate(
                        filter(None,
                               [wordvec, last_state, rev_last_state, capvec]))
                    entry.vec2 = concatenate(
                        filter(None,
                               [wordvec, last_state, rev_last_state, capvec]))

                    entry.pos_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                if not self.depFlag:

                    #Predicted pos tags
                    lstm_forward = self.pos_builders[0].initial_state()
                    lstm_backward = self.pos_builders[1].initial_state()
                    for entry, rentry in zip(conll_sentence,
                                             reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.pos_lstms[1] = lstm_forward.output()
                        rentry.pos_lstms[0] = lstm_backward.output()

                    for entry in conll_sentence:
                        entry.pos_vec = concatenate(entry.pos_lstms)

                    blstm_forward = self.pos_bbuilders[0].initial_state()
                    blstm_backward = self.pos_bbuilders[1].initial_state()

                    for entry, rentry in zip(conll_sentence,
                                             reversed(conll_sentence)):
                        blstm_forward = blstm_forward.add_input(entry.pos_vec)
                        blstm_backward = blstm_backward.add_input(
                            rentry.pos_vec)
                        entry.pos_lstms[1] = blstm_forward.output()
                        rentry.pos_lstms[0] = blstm_backward.output()

                    concat_layer = [
                        concatenate(entry.pos_lstms)
                        for entry in conll_sentence
                    ]
                    #cap_info_sentence=[self.caplookup[entry.capInfo] for entry in conll_sentence]
                    outputFFlayer = self.ffSeqPredictor.predict_sequence(
                        concat_layer)
                    best_parentids, bestscores = self.ffSeqPredictor.viterbi_sequence(
                        outputFFlayer, self.nertrans_lookup)
                    predicted_pos_indices = [
                        np.argmax(o.value()) for o in outputFFlayer
                    ]
                    root_predicted_postags = ["O"]
                    predicted_postags = [
                        self.id2pos[idx] for idx in best_parentids
                    ]
                    for pos in predicted_postags:
                        root_predicted_postags.append(pos)
                    if iSentence < 5:
                        for word, tag in zip(conll_sentence,
                                             root_predicted_postags):
                            print("word : {}  gold : {} pred : {}".format(
                                word.form, word.pos, tag))
                    for entry, pos in zip(conll_sentence,
                                          root_predicted_postags):
                        entry.pred_pos = pos
                    dump = False

                if self.depFlag:

                    # Add predicted pos tags for parsing prediction
                    #for entry, posid in zip(conll_sentence, viterbi_pred_tagids):
                    #    entry.vec = concatenate([entry.vec, self.plookup[posid]])
                    #    entry.lstms = [entry.vec, entry.vec]
                    for entry in conll_sentence:

                        entry.lstms = [entry.vec, entry.vec]

                    if self.blstmFlag:
                        lstm_forward = self.builders[0].initial_state()
                        lstm_backward = self.builders[1].initial_state()

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            lstm_forward = lstm_forward.add_input(entry.vec)
                            lstm_backward = lstm_backward.add_input(rentry.vec)
                            entry.lstms[1] = lstm_forward.output()
                            rentry.lstms[0] = lstm_backward.output()

                        if self.bibiFlag:
                            for entry in conll_sentence:
                                entry.vec = concatenate(entry.lstms)

                            blstm_forward = self.bbuilders[0].initial_state()
                            blstm_backward = self.bbuilders[1].initial_state()

                            for entry, rentry in zip(conll_sentence,
                                                     reversed(conll_sentence)):
                                blstm_forward = blstm_forward.add_input(
                                    entry.vec)
                                blstm_backward = blstm_backward.add_input(
                                    rentry.vec)

                                entry.lstms[1] = blstm_forward.output()
                                rentry.lstms[0] = blstm_backward.output()

                    scores, exprs = self.__evaluate(conll_sentence)
                    heads = decoder.parse_proj(scores)

                    # Multiple roots: heading to the previous "rooted" one
                    rootCount = 0
                    rootWid = -1
                    for index, head in enumerate(heads):
                        if head == 0:
                            rootCount += 1
                            if rootCount == 1:
                                rootWid = index
                            if rootCount > 1:
                                heads[index] = rootWid
                                rootWid = index

                    for entry, head in zip(conll_sentence, heads):
                        entry.pred_parent_id = head
                        entry.pred_relation = '_'
                        #entry.pred_pos = pos

                    if self.labelsFlag:
                        concat_layer = [
                            self.__getRelVector(conll_sentence, head,
                                                modifier + 1)
                            for modifier, head in enumerate(heads[1:])
                        ]
                        outputFFlayer = self.ffRelPredictor.predict_sequence(
                            concat_layer)
                        predicted_rel_indices = [
                            np.argmax(o.value()) for o in outputFFlayer
                        ]
                        predicted_rels = [
                            self.irels[idx] for idx in predicted_rel_indices
                        ]
                        for modifier, head in enumerate(heads[1:]):
                            conll_sentence[
                                modifier +
                                1].pred_relation = predicted_rels[modifier]

                    if self.sNerFlag and ner_epoch == 1:

                        conll_sentence[0].vec = concatenate([
                            conll_sentence[0].vec2,
                            self.rellookup[self.rels["rroot"]]
                        ])
                        for entry, pred in zip(conll_sentence[1:],
                                               predicted_rel_indices):
                            relvec = self.rellookup[pred]
                            # for entry, posid in zip(conll_sentence, viterbi_pred_tagids):
                            entry.vec = concatenate([entry.vec2, relvec])
                        for entry in conll_sentence:
                            entry.ner2_lstms = [entry.vec, entry.vec]

                        slstm_forward = self.sner_builders[0].initial_state()
                        slstm_backward = self.sner_builders[1].initial_state()

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            lstm_forward = slstm_forward.add_input(entry.vec)
                            lstm_backward = slstm_backward.add_input(
                                rentry.vec)

                            entry.ner2_lstms[1] = lstm_forward.output()
                            rentry.ner2_lstms[0] = lstm_backward.output()

                        for entry in conll_sentence:
                            entry.ner2_vec = concatenate(entry.ner2_lstms)

                        sblstm_forward = self.sner_bbuilders[0].initial_state()
                        sblstm_backward = self.sner_bbuilders[1].initial_state(
                        )

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            blstm_forward = sblstm_forward.add_input(
                                entry.ner2_vec)
                            blstm_backward = sblstm_backward.add_input(
                                rentry.ner2_vec)

                            entry.ner2_lstms[1] = blstm_forward.output()
                            rentry.ner2_lstms[0] = blstm_backward.output()

                        concat_layer = [
                            dynet.dropout(concatenate(entry.ner2_lstms), 0.33)
                            for entry in conll_sentence
                        ]
                        outputFFlayer = self.ffSeqPredictor.predict_sequence(
                            concat_layer)
                        best_parentids, bestscores = self.ffSeqPredictor.viterbi_sequence(
                            outputFFlayer, self.nertrans_lookup)
                        predicted_pos_indices = [
                            np.argmax(o.value()) for o in outputFFlayer
                        ]
                        root_predicted_postags = ["O"]
                        predicted_postags = [
                            self.id2pos[idx] for idx in best_parentids
                        ]
                        for pos in predicted_postags:
                            root_predicted_postags.append(pos)
                        if iSentence < 1:
                            for word, tag in zip(conll_sentence,
                                                 root_predicted_postags):
                                print("word : {}  gold : {} pred : {}".format(
                                    word.form, word.pos, tag))
                        for entry, pos in zip(conll_sentence,
                                              root_predicted_postags):
                            entry.pred_pos = pos

                    dump = False

                renew_cg()
                if not dump:
                    yield sentence

    def Train(self, conll_path, dep_epoch=0, ner_epoch=0):
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()
        dep_epoch = dep_epoch
        ner_epoch = ner_epoch
        with open(conll_path, 'r') as conllFP:
            if ner_epoch == 0:
                read_conll_nerdep = read_conll(conllFP, self.c2i)
            else:
                read_conll_nerdep = read_conll_ner(conllFP, self.c2i)
            shuffledData = list(read_conll_nerdep)
            random.shuffle(shuffledData)

            errs = []
            lerrs = []
            posErrs = 0
            postrErrs = []
            nertr2Errs = []
            ner2Errs = dynet.inputVector([0])
            startind = 0
            e = 0
            for iSentence, sentence in enumerate(shuffledData):
                if iSentence % 500 == 0 and iSentence != 0:
                    print "Processing sentence number: %d" % iSentence, ", Loss: %.4f" % (
                        eloss / etotal), ", Time: %.2f" % (time.time() - start)
                    start = time.time()
                    eerrors = 0
                    eloss = 0.0
                    etotal = 0

                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]

                for entry in conll_sentence:
                    c = float(self.wordsCount.get(entry.norm, 0))
                    dropFlag = (random.random() < (c / (0.25 + c)))
                    capvec = self.caps_lookup[entry.capInfo]
                    wordvec = self.wlookup[
                        int(self.vocab.get(entry.norm, 0)
                            ) if dropFlag else 0] if self.wdims > 0 else None

                    last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in reversed(entry.idChars)])[-1]

                    entry.vec = dynet.dropout(
                        concatenate(
                            filter(
                                None,
                                [wordvec, last_state, rev_last_state, capvec
                                 ])), 0.33)
                    entry.vec2 = entry.vec
                    entry.pos_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                if not self.depFlag:

                    #NER tagging loss
                    lstm_forward = self.pos_builders[0].initial_state()
                    lstm_backward = self.pos_builders[1].initial_state()
                    for entry, rentry in zip(conll_sentence,
                                             reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.pos_lstms[1] = lstm_forward.output()
                        rentry.pos_lstms[0] = lstm_backward.output()

                    for entry in conll_sentence:
                        entry.pos_vec = concatenate(entry.pos_lstms)

                    blstm_forward = self.pos_bbuilders[0].initial_state()
                    blstm_backward = self.pos_bbuilders[1].initial_state()

                    for entry, rentry in zip(conll_sentence,
                                             reversed(conll_sentence)):
                        blstm_forward = blstm_forward.add_input(entry.pos_vec)
                        blstm_backward = blstm_backward.add_input(
                            rentry.pos_vec)

                        entry.pos_lstms[1] = blstm_forward.output()
                        rentry.pos_lstms[0] = blstm_backward.output()

                    concat_layer = [
                        dynet.dropout(concatenate(entry.pos_lstms), 0.33)
                        for entry in conll_sentence
                    ]
                    cap_info_sentence = [
                        self.caps_lookup[entry.capInfo]
                        for entry in conll_sentence
                    ]
                    outputFFlayer = self.ffSeqPredictor.predict_sequence(
                        concat_layer)
                    posIDs = [
                        self.pos.get(entry.pos) for entry in conll_sentence
                    ]
                    posErrs = (self.forward_score(outputFFlayer) -
                               self.pick_gold_score(outputFFlayer, posIDs))

                ##dependency Flag
                if self.depFlag:
                    # Add predicted ner tags
                    #for entry, poses in zip(conll_sentence, outputFFlayer):
                    #    entry.vec = concatenate([entry.vec, dynet.dropout(self.plookup[np.argmax(poses.value())], 0.33)])
                    for entry in conll_sentence:
                        entry.lstms = [entry.vec, entry.vec]

                    #Parsing losses
                    if self.blstmFlag:
                        lstm_forward = self.builders[0].initial_state()
                        lstm_backward = self.builders[1].initial_state()

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            lstm_forward = lstm_forward.add_input(entry.vec)
                            lstm_backward = lstm_backward.add_input(rentry.vec)

                            entry.lstms[1] = lstm_forward.output()
                            rentry.lstms[0] = lstm_backward.output()

                        if self.bibiFlag:
                            for entry in conll_sentence:
                                entry.vec = concatenate(entry.lstms)

                            blstm_forward = self.bbuilders[0].initial_state()
                            blstm_backward = self.bbuilders[1].initial_state()

                            for entry, rentry in zip(conll_sentence,
                                                     reversed(conll_sentence)):
                                blstm_forward = blstm_forward.add_input(
                                    entry.vec)
                                blstm_backward = blstm_backward.add_input(
                                    rentry.vec)

                                entry.lstms[1] = blstm_forward.output()
                                rentry.lstms[0] = blstm_backward.output()

                    scores, exprs = self.__evaluate(conll_sentence)
                    gold = [entry.parent_id for entry in conll_sentence]
                    heads = decoder.parse_proj(
                        scores, gold if self.costaugFlag else None)

                    if self.labelsFlag:

                        concat_layer = [
                            dynet.dropout(
                                self.__getRelVector(conll_sentence, head,
                                                    modifier + 1), 0.33)
                            for modifier, head in enumerate(gold[1:])
                        ]
                        outputFFlayer = self.ffRelPredictor.predict_sequence(
                            concat_layer)
                        if dep_epoch == 1:
                            relIDs = [
                                self.rels[conll_sentence[modifier +
                                                         1].relation]
                                for modifier, _ in enumerate(gold[1:])
                            ]
                            for pred, goldid in zip(outputFFlayer, relIDs):
                                lerrs.append(self.pick_neg_log(pred, goldid))
                    if dep_epoch == 1:
                        e = sum(
                            [1 for h, g in zip(heads[1:], gold[1:]) if h != g])

                    if self.sNerFlag and ner_epoch == 1:

                        conll_sentence[0].vec = concatenate([
                            conll_sentence[0].vec2,
                            self.rellookup[self.rels["rroot"]]
                        ])
                        for entry, pred in zip(conll_sentence[1:],
                                               outputFFlayer):
                            relvec = self.rellookup[np.argmax(pred.value())]
                            entry.vec = concatenate(
                                [entry.vec2,
                                 dynet.dropout(relvec, 0.33)])

                        for entry in conll_sentence:
                            entry.ner2_lstms = [entry.vec, entry.vec]

                        slstm_forward = self.sner_builders[0].initial_state()
                        slstm_backward = self.sner_builders[1].initial_state()

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            lstm_forward = slstm_forward.add_input(entry.vec)
                            lstm_backward = slstm_backward.add_input(
                                rentry.vec)

                            entry.ner2_lstms[1] = lstm_forward.output()
                            rentry.ner2_lstms[0] = lstm_backward.output()

                        for entry in conll_sentence:
                            entry.ner2_vec = concatenate(entry.ner2_lstms)

                        sblstm_forward = self.sner_bbuilders[0].initial_state()
                        sblstm_backward = self.sner_bbuilders[1].initial_state(
                        )

                        for entry, rentry in zip(conll_sentence,
                                                 reversed(conll_sentence)):
                            blstm_forward = sblstm_forward.add_input(
                                entry.ner2_vec)
                            blstm_backward = sblstm_backward.add_input(
                                rentry.ner2_vec)

                            entry.ner2_lstms[1] = blstm_forward.output()
                            rentry.ner2_lstms[0] = blstm_backward.output()

                        concat_layer = [
                            dynet.dropout(concatenate(entry.ner2_lstms), 0.33)
                            for entry in conll_sentence
                        ]
                        outputFFlayer = self.ffSeqPredictor.predict_sequence(
                            concat_layer)
                        posIDs = [
                            self.pos.get(entry.pos) for entry in conll_sentence
                        ]
                        gold_score = self.pick_gold_score(
                            outputFFlayer, posIDs)
                        ner2Errs = (self.forward_score(outputFFlayer) -
                                    gold_score)

                    if iSentence < 5:
                        print("ner and dep loss")
                        if ner2Errs != 0:
                            print(ner2Errs.value())
                        else:
                            print(0)
                        if dep_epoch != 0:
                            print(esum(lerrs).value())
                        else:
                            print(0)

                eerrors += e
                if e > 0:
                    loss = [(exprs[h][i] - exprs[g][i])
                            for i, (h, g) in enumerate(zip(heads, gold))
                            if h != g]  # * (1.0/float(e))
                    eloss += (e)
                    mloss += (e)
                    errs.extend(loss)

                etotal += len(conll_sentence)

                if iSentence % 1 == 0:
                    if len(errs) > 0 or len(lerrs) > 0 or posErrs > 0 or len(
                            postrErrs) > 0 or ner2Errs > 0 or len(
                                nertr2Errs) > 0:
                        eerrs = 0
                        if len(errs + lerrs + postrErrs + nertr2Errs) > 0:
                            eerrs = esum(errs + lerrs + postrErrs + nertr2Errs)
                        eerrs += (posErrs + ner2Errs)
                        #print(eerrs.value())
                        eerrs.scalar_value()
                        eerrs.backward()
                        self.trainer.update()
                        errs = []
                        e = 0
                        lerrs = []
                        posErrs = []
                        postrErrs = []
                        ner2Errs = []
                        nertr2Errs = []
                        posErrs = 0
                        ner2Errs = 0

                    renew_cg()

        print "Loss: %.4f" % (mloss / iSentence)
Esempio n. 4
0
class jPosDepLearner:
    def __init__(self, vocab, pos, rels, w2i, c2i, m2i, t2i, morph_dict, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        #if options.learning_rate is not None:
        #    self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
        #    print("Adam initial learning rate:", options.learning_rate)
        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag
        self.morphFlag = options.morphFlag
        self.goldMorphFlag = options.goldMorphFlag
        self.morphTagFlag = options.morphTagFlag
        self.goldMorphTagFlag = options.goldMorphTagFlag
        self.lowerCase = options.lowerCase
        self.mtag_encoding_composition_type = options.mtag_encoding_composition_type
        self.mtag_encoding_composition_alpha = options.mtag_encoding_composition_alpha

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.mdims = options.membedding_dims
        self.tdims = options.tembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in iter(w2i.items())}
        self.pos = {word: ind for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for ind, word in enumerate(pos)}
        self.c2i = c2i
        self.m2i = m2i
        self.t2i = t2i
        self.i2t = {t2i[i]:i for i in self.t2i}
        self.morph_dict = morph_dict
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels
        self.pdims = options.pembedding_dims
        self.tagging_attention_size = options.tagging_att_size

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims))
        self.ext_embeddings = None

        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=self.lowerCase, type=options.external_embedding_type)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                if word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word], ext_embeddings[word])
            self.ext_embeddings = ext_embeddings
            print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count))

        self.morph_dims = 2*2*self.mdims if self.morphFlag else 0
        self.mtag_dims = 2*self.tdims if self.morphTagFlag else 0
        self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)]
        self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]

        if self.bibiFlag:
            self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)]
            self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)]

        self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax))

        self.hidden_units = options.hidden_units

        self.hidBias = self.model.add_parameters((self.ldims * 8))
        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
        self.hid2Bias = self.model.add_parameters((self.hidden_units))

        self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8))

        if self.labelsFlag:
            self.rhidBias = self.model.add_parameters((self.ldims * 8))
            self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
            self.rhid2Bias = self.model.add_parameters((self.hidden_units))
            self.routLayer = self.model.add_parameters(
                (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8))
            self.routBias = self.model.add_parameters((len(self.irels)))
            self.ffRelPredictor = FFSequencePredictor(
                Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels),
                      softmax))

        self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model))

        if self.morphFlag:
            self.seg_lstm = [VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model),
                                    VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model)]
            self.seg_hidLayer = self.model.add_parameters((1, self.cdims*2))
            self.slookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims))

            self.char_lstm = [VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model),
                                    VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model)]
            self.char_hidLayer = self.model.add_parameters((self.mdims, self.mdims*2))
            self.mclookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims))

            self.morph_lstm = [VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model),
                                VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model)]
            self.morph_hidLayer = self.model.add_parameters((self.wdims, self.wdims*2))
            self.mlookup = self.model.add_lookup_parameters((len(m2i), self.mdims))

            self.morph_rnn = RNNSequencePredictor(LSTMBuilder(1, self.mdims*2, self.mdims*2, self.model))

        if self.morphTagFlag:
            # All weights for morpheme taging will be here. (CURSOR)

            # Decoder
            self.dec_lstm = VanillaLSTMBuilder(1, 2 * self.cdims + self.tdims + self.cdims * 2, self.cdims, self.model)

            # Attention
            self.attention_w1 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_w2 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_v = self.model.add_parameters((1, self.tagging_attention_size))

            # Attention Context
            self.attention_w1_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_w2_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_v_context = self.model.add_parameters((1, self.tagging_attention_size))

            # MLP - Softmax
            self.decoder_w = self.model.add_parameters((len(t2i), self.cdims))
            self.decoder_b = self.model.add_parameters((len(t2i)))

            self.mtag_rnn = RNNSequencePredictor(VanillaLSTMBuilder(1, self.tdims, self.tdims, self.model))
            self.tlookup = self.model.add_lookup_parameters((len(t2i), self.tdims))
            if self.mtag_encoding_composition_type != "None":
                self.mtag_encoding_f_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims))
                self.mtag_encoding_f_b = self.model.add_parameters((2 * self.tdims))
                self.mtag_encoding_b_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims))
                self.mtag_encoding_b_b = self.model.add_parameters((2 * self.tdims))

    def initialize(self):
        if self.morphFlag and self.ext_embeddings:
            print("Initializing word embeddings by morph2vec")
            count = 0
            for word in self.vocab:
                if word not in self.ext_embeddings and word in self.morph_dict:
                    morph_seg = self.morph_dict[word]

                    count += 1
                    self.wlookup.init_row(self.vocab[word], self.__getWordVector(morph_seg).vec_value())
            print("Vocab size: %d; #missing words having generated vectors: %d" % (len(self.vocab), count))
            renew_cg()

    def __getExpr(self, sentence, i, j):

        if sentence[i].headfov is None:
            sentence[i].headfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].modfov is None:
            sentence[j].modfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]])

        _inputVector = concatenate(
            [sentence[i].headfov, sentence[j].modfov, dynet.abs(sentence[i].headfov - sentence[j].modfov),
             dynet.cmult(sentence[i].headfov, sentence[j].modfov)])

        if self.hidden_units > 0:
            output = self.outLayer.expr() * self.activation(
                self.hid2Bias.expr() + self.hidLayer.expr() * self.activation(
                    _inputVector + self.hidBias.expr()))
        else:
            output = self.outLayer.expr() * self.activation(_inputVector + self.hidBias.expr())

        return output

    def __evaluate(self, sentence):
        exprs = [[self.__getExpr(sentence, i, j) for j in range(len(sentence))] for i in range(len(sentence))]
        scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs])

        return scores, exprs

    def pick_neg_log(self, pred, gold):
        return -dynet.log(dynet.pick(pred, gold))

    def binary_crossentropy(self, pred, gold):
        return dynet.binary_log_loss(pred, gold)

    def cosine_proximity(self, pred, gold):
        def l2_normalize(x):
            square_sum = dynet.sqrt(dynet.bmax(dynet.sum_elems(dynet.square(x)), np.finfo(float).eps * dynet.ones((1))[0]))
            return dynet.cdiv(x, square_sum)

        y_true = l2_normalize(pred)
        y_pred = l2_normalize(gold)

        return -dynet.sum_elems(dynet.cmult(y_true, y_pred))

    def __getRelVector(self, sentence, i, j):
        if sentence[i].rheadfov is None:
            sentence[i].rheadfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].rmodfov is None:
            sentence[j].rmodfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]])
        _outputVector = concatenate(
            [sentence[i].rheadfov, sentence[j].rmodfov, abs(sentence[i].rheadfov - sentence[j].rmodfov),
             cmult(sentence[i].rheadfov, sentence[j].rmodfov)])

        if self.hidden_units > 0:
            return self.rhid2Bias.expr() + self.rhidLayer.expr() * self.activation(
                _outputVector + self.rhidBias.expr())
        else:
            return _outputVector

    def __getSegmentationVector(self, word):
        slstm_forward = self.seg_lstm[0].initial_state()
        slstm_backward = self.seg_lstm[1].initial_state()

        seg_lstm_forward = slstm_forward.transduce([self.slookup[self.c2i[char] if char in self.c2i else 0] for char in word])
        seg_lstm_backward = slstm_backward.transduce([self.slookup[self.c2i[char] if char in self.c2i else 0] for char in reversed(word)])

        seg_vec = []
        for seg, rev_seg in zip(seg_lstm_forward,reversed(seg_lstm_backward)):
            seg_vec.append(dynet.logistic(self.seg_hidLayer.expr() * concatenate([seg,rev_seg])))

        seg_vec = concatenate(seg_vec)

        return seg_vec

    def __getMorphVector(self, morph):
        clstm_forward = self.char_lstm[0].initial_state()
        clstm_backward = self.char_lstm[1].initial_state()

        char_lstm_forward = clstm_forward.transduce([self.mclookup[self.c2i[char] if char in self.c2i else 0] for char in morph] if len(morph) > 0 else [self.mclookup[0]])[-1]
        char_lstm_backward = clstm_backward.transduce([self.mclookup[self.c2i[char] if char in self.c2i else 0] for char in reversed(morph)] if len(morph) > 0 else [self.mclookup[0]])[-1]

        char_emb = self.char_hidLayer.expr() * concatenate([char_lstm_forward,char_lstm_backward])

        return concatenate([self.mlookup[self.m2i[morph] if morph in self.m2i else 0], char_emb])

    def __getWordVector(self, morph_seg):
        mlstm_forward = self.morph_lstm[0].initial_state()
        mlstm_backward = self.morph_lstm[1].initial_state()

        morph_lstm_forward = mlstm_forward.transduce([self.__getMorphVector(morph) for morph in morph_seg])[-1]
        morph_lstm_backward = mlstm_backward.transduce([self.__getMorphVector(morph) for morph in reversed(morph_seg)])[-1]

        morph_enc = concatenate([morph_lstm_forward, morph_lstm_backward])
        word_vec = self.morph_hidLayer.expr() * morph_enc

        return word_vec

    def attend(self, input_mat, state, w1dt):
        w2 = parameter(self.attention_w2)
        v = parameter(self.attention_v)

        # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols
        # w1dt: (attdim x seqlen)
        # w2dt: (attdim,1)
        w2dt = w2 * concatenate(list(state.s()))
        # att_weights: (seqlen,) row vector
        # unnormalized: (seqlen,)
        unnormalized = transpose(v * tanh(colwise_add(w1dt, w2dt)))
        att_weights = softmax(unnormalized)
        # context: (encoder_state)
        context = input_mat * att_weights
        return context

    def attend_context(self, input_mat, state, w1dt_context):
        w2_context = parameter(self.attention_w2_context)
        v_context = parameter(self.attention_v_context)

        # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols
        # w1dt: (attdim x seqlen)
        # w2dt: (attdim,1)
        w2dt_context = w2_context * concatenate(list(state.s()))
        # att_weights: (seqlen,) row vector
        # unnormalized: (seqlen,)
        unnormalized = transpose(v_context * tanh(colwise_add(w1dt_context, w2dt_context)))
        att_weights = softmax(unnormalized)
        # context: (encoder_state)
        context = input_mat * att_weights
        return context

    def decode(self, vectors, decoder_seq, word_context):
        w = parameter(self.decoder_w)
        b = parameter(self.decoder_b)
        w1 = parameter(self.attention_w1)

        w1_context = parameter(self.attention_w1_context)
        input_mat = concatenate_cols(vectors)
        input_context = concatenate_cols(word_context)

        w1dt = None
        w1dt_context = None

        last_output_embeddings = self.tlookup[self.t2i["<s>"]]
        s = self.dec_lstm.initial_state().add_input(concatenate([vecInput(self.cdims * 2),
                                                                    last_output_embeddings,
                                                                    vecInput(self.cdims * 2)]))
        loss = []

        for char in decoder_seq:
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt = w1dt or w1 * input_mat
            w1dt_context = w1dt_context or w1_context * input_context
            vector = concatenate([self.attend(input_mat, s, w1dt),
                                     last_output_embeddings,
                                     self.attend_context(input_context, s, w1dt_context)])
            s = s.add_input(vector)
            out_vector = w * s.output() + b
            probs = softmax(out_vector)
            last_output_embeddings = self.tlookup[char]
            loss.append(-log(pick(probs, char)))
        loss = esum(loss)
        return loss

    def __getLossMorphTagging(self, all_encoded_states, decoder_gold, word_context):
        return self.decode(all_encoded_states, decoder_gold, word_context)

    def generate(self, encoded, word_context):
        w = parameter(self.decoder_w)
        b = parameter(self.decoder_b)
        w1 = parameter(self.attention_w1)

        w1_context = parameter(self.attention_w1_context)

        input_mat = concatenate_cols(encoded)
        input_context = concatenate_cols(word_context)

        w1dt = None
        w1dt_context = None

        last_output_embeddings = self.tlookup[self.t2i["<s>"]]
        s = self.dec_lstm.initial_state().add_input(concatenate([vecInput(self.cdims * 2),
                                                                    last_output_embeddings,
                                                                    vecInput(self.cdims * 2)]))

        out = []
        count_EOS = 0
        limit_features = 10
        for i in range(limit_features):
            if count_EOS == 2: break
            # w1dt can be computed and cached once for the entire decoding phase
            w1dt = w1dt or w1 * input_mat
            w1dt_context = w1dt_context or w1_context * input_context
            vector = concatenate([self.attend(input_mat, s, w1dt),
                                     last_output_embeddings,
                                     self.attend_context(input_context, s, w1dt_context)])

            s = s.add_input(vector)
            out_vector = w * s.output() + b
            probs = softmax(out_vector).vec_value()
            next_char = probs.index(max(probs))
            last_output_embeddings = self.tlookup[next_char]
            if next_char == self.t2i["<s>"]:
                count_EOS += 1
            out.append(next_char)
        return out

    def Save(self, filename):
        self.model.save(filename)

    def Load(self, filename):
        self.model.populate(filename)

    def Predict(self, conll_path):
        with open(conll_path, 'r') as conllFP:
            for iSentence, sentence in enumerate(read_conll(conllFP, self.c2i, self.m2i, self.t2i, self.morph_dict)):
                conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)]

                if self.morphTagFlag:
                    sentence_context = []
                    last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1]
                    rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1]
                    sentence_context.append(concatenate([last_state_char, rev_last_state_char]))
                    for entry in conll_sentence:
                        last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])
                        rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])
                        entry.char_rnn_states = [concatenate([f,b]) for f,b in zip(last_state_char, rev_last_state_char)]
                        sentence_context.append(entry.char_rnn_states[-1])

                for idx, entry in enumerate(conll_sentence):
                    wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None

                    if self.morphTagFlag:
                        entry.vec = concatenate([wordvec, entry.char_rnn_states[-1]])
                    else:
                        last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1]
                        rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[-1]
                        entry.vec = concatenate([wordvec, last_state_char, rev_last_state_char])
                
                for idx, entry in enumerate(conll_sentence):
                    if self.morphFlag:
                        if len(entry.norm) > 2:
                            if self.goldMorphFlag:
                                seg_vec = self.__getSegmentationVector(entry.norm)
                                seg_vec = dynet.vecInput(seg_vec.dim()[0][0])
                                seg_vec.set(entry.idMorphs)
                                morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value())
                                entry.pred_seg = morph_seg
                            else:
                                seg_vec = self.__getSegmentationVector(entry.norm)
                                morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value())
                                entry.pred_seg = seg_vec.vec_value()
                        else:
                            morph_seg = [entry.norm]
                            entry.pred_seg =  entry.idMorphs

                        entry.seg = entry.idMorphs

                        last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in morph_seg])[-1]
                        rev_last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in reversed(morph_seg)])[
                            -1]

                        entry.vec = concatenate([entry.vec, last_state_morph, rev_last_state_morph])
                
                morphtag_encodings = []
                for idx, entry in enumerate(conll_sentence):
                    if self.morphTagFlag:
                        if self.goldMorphTagFlag:
                            morph_tags = entry.idMorphTags
                            entry.pred_tags = entry.idMorphTags
                            entry.pred_tags_tokens = [self.i2t[m_tag_id] for m_tag_id in entry.pred_tags]
                        else:                                                    
                            word_context = [c for i, c in enumerate(sentence_context) if i - 1 != idx]
                            entry.pred_tags = self.generate(entry.char_rnn_states, word_context)
                            morph_tags = entry.pred_tags
                            entry.tags = entry.idMorphTags
                            entry.pred_tags_tokens = [self.i2t[m_tag_id] for m_tag_id in entry.pred_tags]

                        last_state_mtag = self.mtag_rnn.predict_sequence([self.tlookup[t] for t in morph_tags])[-1]
                        rev_last_state_mtag = self.mtag_rnn.predict_sequence([self.tlookup[t] for t in reversed(morph_tags)])[-1]
                        current_encoding_mtag = concatenate([last_state_mtag, rev_last_state_mtag])  
                        morphtag_encodings.append(current_encoding_mtag)

                if self.morphTagFlag:
                    forward = []
                    for idx, encoding in enumerate(morphtag_encodings):
                        if idx == 0:
                            forward.append(encoding)
                        else:
                            updated = morphtag_encodings[idx-1]*self.mtag_encoding_composition_alpha \
                                    + encoding*(1-self.mtag_encoding_composition_alpha)
                            forward.append(updated)
                    if self.mtag_encoding_composition_type == "w_sum":
                        upper_morphtag_encodings = forward
                    elif self.mtag_encoding_composition_type == "bi_w_sum":
                        backward = []
                        for idx, r_encoding in enumerate(morphtag_encodings):
                            if idx == len(morphtag_encodings) - 1:
                                backward.append(r_encoding)
                            else:
                                updated = morphtag_encodings[idx+1]*self.mtag_encoding_composition_alpha \
                                        + r_encoding*(1-self.mtag_encoding_composition_alpha)
                                backward.append(updated)
                        upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)]
                    elif  self.mtag_encoding_composition_type == "bi_mlp":
                        forward = []
                        backward = []
                        for idx, encoding in enumerate(morphtag_encodings):
                            if idx != 0:
                                f = self.mtag_encoding_f_w * concatenate([encoding, morphtag_encodings[idx-1]]) \
                                            + self.mtag_encoding_f_b
                                forward.append(f)
                            else:
                                forward.append(encoding)
                            if idx != len(morphtag_encodings) - 1:
                                b = self.mtag_encoding_b_w * concatenate([encoding, morphtag_encodings[idx+1]]) \
                                            + self.mtag_encoding_b_b
                                backward.append(b)
                            else:
                                backward.append(encoding)
                        upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)]
                    else:
                        upper_morphtag_encodings = morphtag_encodings

                    for entry, mtag in zip(conll_sentence, upper_morphtag_encodings):
                        entry.vec = concatenate([entry.vec, mtag])


                for idx, entry in enumerate(conll_sentence):
                    entry.pos_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                #Predicted pos tags
                lstm_forward = self.pos_builders[0].initial_state()
                lstm_backward = self.pos_builders[1].initial_state()
                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.pos_lstms[1] = lstm_forward.output()
                    rentry.pos_lstms[0] = lstm_backward.output()

                for entry in conll_sentence:
                    entry.pos_vec = concatenate(entry.pos_lstms)

                blstm_forward = self.pos_bbuilders[0].initial_state()
                blstm_backward = self.pos_bbuilders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.pos_vec)
                    blstm_backward = blstm_backward.add_input(rentry.pos_vec)
                    entry.pos_lstms[1] = blstm_forward.output()
                    rentry.pos_lstms[0] = blstm_backward.output()

                concat_layer = [concatenate(entry.pos_lstms) for entry in conll_sentence]
                outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer)
                predicted_pos_indices = [np.argmax(o.value()) for o in outputFFlayer]
                predicted_postags = [self.id2pos[idx] for idx in predicted_pos_indices]

                # Add predicted pos tags for parsing prediction
                for entry, posid in zip(conll_sentence, predicted_pos_indices):
                    entry.vec = concatenate([entry.vec, self.plookup[posid]])
                    entry.lstms = [entry.vec, entry.vec]

                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in conll_sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                scores, exprs = self.__evaluate(conll_sentence)
                heads = decoder.parse_proj(scores)

                # Multiple roots: heading to the previous "rooted" one
                rootCount = 0
                rootWid = -1
                for index, head in enumerate(heads):
                    if head == 0:
                        rootCount += 1
                        if rootCount == 1:
                            rootWid = index
                        if rootCount > 1:
                            heads[index] = rootWid
                            rootWid = index

                for entry, head, pos in zip(conll_sentence, heads, predicted_postags):
                    entry.pred_parent_id = head
                    entry.pred_relation = '_'
                    entry.pred_pos = pos

                dump = False

                if self.labelsFlag:
                    concat_layer = [self.__getRelVector(conll_sentence, head, modifier + 1) for modifier, head in
                                    enumerate(heads[1:])]
                    outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer)
                    predicted_rel_indices = [np.argmax(o.value()) for o in outputFFlayer]
                    predicted_rels = [self.irels[idx] for idx in predicted_rel_indices]
                    for modifier, head in enumerate(heads[1:]):
                        conll_sentence[modifier + 1].pred_relation = predicted_rels[modifier]

                renew_cg()
                if not dump:
                    yield sentence

    def morph2word(self, morph_dict):
        word_emb = {}
        for word in morph_dict.keys():
            morph_seg = morph_dict[word]

            word_vec = self.__getWordVector(morph_seg)
            word_emb[word] = word_vec.vec_value()
        renew_cg()
        return word_emb

    def morph(self):
        morph_dict = {}
        for morph in self.m2i.keys():
            morph_dict[morph] = self.__getMorphVector(morph).vec_value()
        renew_cg()
        return morph_dict

    def Train_Morph(self):
        self.trainer.set_sparse_updates(False)
        start = time.time()
        for iWord, word in enumerate(list(self.morph_dict.keys())):
            if iWord % 2000 == 0 and iWord != 0:
                print("Processing word number: %d" % iWord, ", Time: %.2f" % (time.time() - start))
                start = time.time()

            morph_seg = self.morph_dict[word]
            morph_vec = self.__getWordVector(morph_seg)

            if self.ext_embeddings is None:
                vec_gold = self.wlookup[int(self.vocab.get(word, 0))].vec_value()
            elif word in self.ext_embeddings:
                vec_gold = self.ext_embeddings[word]
            else:
                vec_gold = None

            if vec_gold is not None:
                y_gold = dynet.vecInput(self.wdims)
                y_gold.set(vec_gold)
                mErrs = self.cosine_proximity(morph_vec, y_gold)
                mErrs.backward()
                self.trainer.update()
            renew_cg()

    def embed_word(self, word):
        return [self.input_lookup[char] for char in word]

    def run_lstm(self, init_state, input_vecs):
        s = init_state
        out_vectors = []
        for vector in input_vecs:
            s = s.add_input(vector)
            out_vector = s.output()
            out_vectors.append(out_vector)
        return out_vectors

    def encode_word(self, word):
        word_rev = list(reversed(word))
        fwd_vectors = self.run_lstm(self.enc_fwd_lstm.initial_state(), word)
        bwd_vectors = self.run_lstm(self.enc_bwd_lstm.initial_state(), word_rev)
        bwd_vectors = list(reversed(bwd_vectors))
        vectors = [concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)]
        return vectors

    def Train(self, conll_path):
        self.trainer.set_sparse_updates(True)
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()

        with open(conll_path, 'r') as conllFP:
            shuffledData = list(read_conll(conllFP, self.c2i, self.m2i, self.t2i, self.morph_dict))
            random.shuffle(shuffledData)

            errs = []
            lerrs = []
            posErrs = []
            segErrs = []
            mTagErrs = []

            for iSentence, sentence in enumerate(shuffledData):
                if iSentence % 500 == 0 and iSentence != 0:
                    print("Processing sentence number: %d" % iSentence, ", Loss: %.4f" % (
                                eloss / etotal), ", Time: %.2f" % (time.time() - start))
                    start = time.time()
                    eerrors = 0
                    eloss = 0.0
                    etotal = 0

                conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)]

                if self.morphTagFlag:
                    sentence_context = []
                    last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1]
                    rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1]
                    sentence_context.append(concatenate([last_state_char, rev_last_state_char]))
                    for entry in conll_sentence:
                        last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])
                        rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])
                        entry.char_rnn_states = [concatenate([f,b]) for f,b in zip(last_state_char, rev_last_state_char)]
                        sentence_context.append(entry.char_rnn_states[-1])

                for idx, entry in enumerate(conll_sentence):
                    c = float(self.wordsCount.get(entry.norm, 0))
                    dropFlag = (random.random() < (c / (0.25 + c)))
                    wordvec = self.wlookup[
                        int(self.vocab.get(entry.norm, 0)) if dropFlag else 0] if self.wdims > 0 else None
                    if self.morphTagFlag :
                        entry.vec = dynet.dropout(concatenate([wordvec, entry.char_rnn_states[-1]]), 0.33)
                    else:
                        last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1]
                        rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[-1]
                        entry.vec = dynet.dropout(concatenate([wordvec, last_state_char, rev_last_state_char]), 0.33)

                for idx, entry in enumerate(conll_sentence):
                    if self.morphFlag:
                        if len(entry.norm) > 2:
                            if self.goldMorphFlag:
                                seg_vec = self.__getSegmentationVector(entry.norm)
                                seg_vec = dynet.vecInput(seg_vec.dim()[0][0])
                                seg_vec.set(entry.idMorphs)
                                morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value())
                            else:
                                seg_vec = self.__getSegmentationVector(entry.norm)
                                morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value())
                                vec_gold = dynet.vecInput(seg_vec.dim()[0][0])
                                vec_gold.set(entry.idMorphs)
                                segErrs.append(self.binary_crossentropy(seg_vec,vec_gold))
                        else:
                            morph_seg = [entry.norm]

                        last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in morph_seg])[-1]
                        rev_last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in reversed(morph_seg)])[
                            -1]
                        encoding_morph = concatenate([last_state_morph, rev_last_state_morph])
                        entry.vec = concatenate([entry.vec, dynet.dropout(encoding_morph, 0.33)])

                morphtag_encodings = []
                for idx, entry in enumerate(conll_sentence):
                    if self.morphTagFlag:
                        if self.goldMorphTagFlag:	
                            morph_tags = entry.idMorphTags
                        else:
                            word_context = [c for i, c in enumerate(sentence_context) if i-1 != idx]
                            mTagErrs.append(
                                self.__getLossMorphTagging(entry.char_rnn_states, entry.idMorphTags, word_context))
                            predicted_sequence = self.generate(entry.char_rnn_states, word_context)
                            morph_tags = predicted_sequence

                        last_state_mtag = self.mtag_rnn.predict_sequence([self.tlookup[t] for t in morph_tags])[-1]
                        rev_last_state_mtag = \
                        self.mtag_rnn.predict_sequence([self.tlookup[t] for t in reversed(morph_tags)])[
                            -1]   
                        current_encoding_mtag = concatenate([last_state_mtag, rev_last_state_mtag])        
                        morphtag_encodings.append(current_encoding_mtag)
        
                if self.morphTagFlag:
                    forward = []
                    for idx, encoding in enumerate(morphtag_encodings):
                        if idx == 0:
                            forward.append(encoding)
                        else:
                            updated = morphtag_encodings[idx-1]*self.mtag_encoding_composition_alpha \
                                    + encoding*(1-self.mtag_encoding_composition_alpha)
                            forward.append(updated)
                    if self.mtag_encoding_composition_type == "w_sum":
                        upper_morphtag_encodings = forward
                    elif self.mtag_encoding_composition_type == "bi_w_sum":
                        backward = []
                        for idx, r_encoding in enumerate(morphtag_encodings):
                            if idx == len(morphtag_encodings) - 1:
                                backward.append(r_encoding)
                            else:
                                updated = morphtag_encodings[idx+1]*self.mtag_encoding_composition_alpha \
                                        + r_encoding*(1-self.mtag_encoding_composition_alpha)
                                backward.append(updated)
                        upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)]   
                    elif  self.mtag_encoding_composition_type == "bi_mlp":
                        forward = []
                        backward = []
                        for idx, encoding in enumerate(morphtag_encodings):
                            if idx != 0:
                                f = self.mtag_encoding_f_w * concatenate([encoding, morphtag_encodings[idx-1]]) \
                                            + self.mtag_encoding_f_b
                                forward.append(f)
                            else:
                                forward.append(encoding)
                            if idx != len(morphtag_encodings) - 1:
                                b = self.mtag_encoding_b_w * concatenate([encoding, morphtag_encodings[idx+1]]) \
                                            + self.mtag_encoding_b_b
                                backward.append(b)
                            else:
                                backward.append(encoding)
                        upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)]
                    else:
                        upper_morphtag_encodings = morphtag_encodings
                    for entry, mtag in zip(conll_sentence, upper_morphtag_encodings):
                        entry.vec = concatenate([entry.vec, dynet.dropout(mtag, 0.33)])

                for idx, entry in enumerate(conll_sentence):
                    entry.pos_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                #POS tagging loss
                lstm_forward = self.pos_builders[0].initial_state()
                lstm_backward = self.pos_builders[1].initial_state()
                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.pos_lstms[1] = lstm_forward.output()
                    rentry.pos_lstms[0] = lstm_backward.output()

                for entry in conll_sentence:
                    entry.pos_vec = concatenate(entry.pos_lstms)

                blstm_forward = self.pos_bbuilders[0].initial_state()
                blstm_backward = self.pos_bbuilders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.pos_vec)
                    blstm_backward = blstm_backward.add_input(rentry.pos_vec)
                    entry.pos_lstms[1] = blstm_forward.output()
                    rentry.pos_lstms[0] = blstm_backward.output()

                concat_layer = [dynet.dropout(concatenate(entry.pos_lstms), 0.33) for entry in conll_sentence]
                outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer)
                posIDs = [self.pos.get(entry.pos) for entry in conll_sentence]
                for pred, gold in zip(outputFFlayer, posIDs):
                    posErrs.append(self.pick_neg_log(pred, gold))

                # Add predicted pos tags
                for entry, poses in zip(conll_sentence, outputFFlayer):
                    entry.vec = concatenate([entry.vec, dynet.dropout(self.plookup[np.argmax(poses.value())], 0.33)])
                    entry.lstms = [entry.vec, entry.vec]

                #Parsing losses
                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in conll_sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                scores, exprs = self.__evaluate(conll_sentence)
                gold = [entry.parent_id for entry in conll_sentence]
                heads = decoder.parse_proj(scores, gold if self.costaugFlag else None)

                if self.labelsFlag:

                    concat_layer = [dynet.dropout(self.__getRelVector(conll_sentence, head, modifier + 1), 0.33) for
                                    modifier, head in enumerate(gold[1:])]
                    outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer)
                    relIDs = [self.rels[conll_sentence[modifier + 1].relation] for modifier, _ in enumerate(gold[1:])]
                    for pred, goldid in zip(outputFFlayer, relIDs):
                        lerrs.append(self.pick_neg_log(pred, goldid))

                e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g])
                eerrors += e
                if e > 0:
                    loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g]  # * (1.0/float(e))
                    eloss += (e)
                    mloss += (e)
                    errs.extend(loss)

                etotal += len(conll_sentence)

                if iSentence % 1 == 0:
                    if len(errs) > 0 or len(lerrs) > 0 or len(posErrs) > 0 or len(segErrs) > 0 or len(mTagErrs) > 0:
                        eerrs = (esum(errs + lerrs + posErrs + segErrs + mTagErrs))
                        eerrs.scalar_value()
                        eerrs.backward()
                        self.trainer.update()
                        errs = []
                        lerrs = []
                        posErrs = []
                        segErrs = []
                        mTagErrs = []

                    renew_cg()

        print("Loss: %.4f" % (mloss / iSentence))
Esempio n. 5
0
class jNeRE:
    def __init__(self, vocab, ner, rels, w2i, c2i, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        if options.learning_rate is not None:
            self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
            print("Adam initial learning rate:", options.learning_rate)
        self.activations = {
            'tanh': tanh,
            'sigmoid': logistic,
            'relu': rectify,
            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.ner = {word: ind for ind, word in enumerate(ner)}
        self.id2ner = {ind: word for ind, word in enumerate(ner)}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.id2rels = rels
        # print self.rels
        # print self.id2rels
        self.nerdims = options.nembedding_dims
        self.mixture_weight = options.mixture_weight
        #self.posCount = postagCount

        #self.pos2id = {word: ind + 1 for ind, word in enumerate(postagCount.keys())}
        #self.pdims = options.pembedding_dims

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.nerlookup = self.model.add_lookup_parameters(
            (len(ner), self.nerdims))
        #self.plookup = self.model.add_lookup_parameters((len(postagCount.keys()) + 1, self.pdims))

        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(
                options.external_embedding, lower=True)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                _word = unicode(word, "utf-8")
                if _word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word],
                                          ext_embeddings[_word])
            print("Vocab size: %d; #words having pretrained vectors: %d" %
                  (len(self.vocab), count))

        self.ner_builders = [
            VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims,
                               self.model),
            VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims,
                               self.model)
        ]
        self.ner_bbuilders = [
            VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
            VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)
        ]

        if self.bibiFlag:
            self.builders = [
                VanillaLSTMBuilder(1,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model),
                VanillaLSTMBuilder(1,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model)
            ]
            self.bbuilders = [
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)
            ]
        elif self.layers > 0:
            self.builders = [
                VanillaLSTMBuilder(self.layers,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model),
                VanillaLSTMBuilder(self.layers,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model)
            ]
        else:
            self.builders = [
                SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims,
                                 self.ldims, self.model),
                SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims,
                                 self.ldims, self.model)
            ]

        # self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.ner), softmax))

        self.hidden_units = options.hidden_units

        self.char_rnn = RNNSequencePredictor(
            LSTMBuilder(1, self.cdims, self.cdims, self.model))

        self.crf_module = CRF(self.model, self.id2ner)

        self.tanh_layer_W = self.model.add_parameters(
            (self.hidden_units, 2 * self.ldims))
        self.tanh_layer_b = self.model.add_parameters((self.hidden_units))

        self.last_layer_W = self.model.add_parameters(
            (len(self.ner), self.hidden_units))
        self.last_layer_b = self.model.add_parameters((len(self.ner)))

        W = orthonormal_initializer(self.hidden_units, 2 * self.ldims)

        self.head_layer_W = self.model.parameters_from_numpy(W)
        self.head_layer_b = self.model.add_parameters(
            (self.hidden_units, ), init=dynet.ConstInitializer(0.))

        self.dep_layer_W = self.model.parameters_from_numpy(W)
        self.dep_layer_b = self.model.add_parameters(
            (self.hidden_units, ), init=dynet.ConstInitializer(0.))

        self.rel_U = self.model.add_parameters(
            (len(self.rels) * self.hidden_units, self.hidden_units),
            init=dynet.ConstInitializer(0.))

        self.rel_W = self.model.parameters_from_numpy(
            orthonormal_initializer(len(self.rels), 2 * self.hidden_units))
        #self.rel_W = self.model.add_parameters((len(self.rels), self.hidden_units * 2))
        self.rel_b = self.model.add_parameters((len(self.rels), ),
                                               init=dynet.ConstInitializer(0.))

    def pick_neg_log(self, pred, gold):
        return -dynet.log(dynet.pick(pred, gold))

    def Save(self, filename):
        self.model.save(filename)

    def Load(self, filename):
        self.model.populate(filename)

    def Predict(self, test_data):
        # with open(conll_path, 'r') as conllFP:
        outputPredNER = {}
        id2arg2rel = {}
        outputPredRel = {}
        count = 0.0
        nercount = 0.0
        for sentenceID in test_data:
            sentence = test_data[sentenceID]

            for entry in sentence:
                wordvec = self.wlookup[int(self.vocab.get(
                    entry.norm, 0))] if self.wdims > 0 else None
                last_state = self.char_rnn.predict_sequence(
                    [self.clookup[c] for c in entry.idChars])[-1]
                rev_last_state = self.char_rnn.predict_sequence(
                    [self.clookup[c] for c in reversed(entry.idChars)])[-1]

                entry.vec = concatenate(
                    filter(None, [wordvec, last_state, rev_last_state]))

                entry.ner_lstms = [entry.vec, entry.vec]
                entry.headfov = None
                entry.modfov = None

                entry.rheadfov = None
                entry.rmodfov = None

            # Predicted ner tags
            lstm_forward = self.ner_builders[0].initial_state()
            lstm_backward = self.ner_builders[1].initial_state()
            for entry, rentry in zip(sentence, reversed(sentence)):
                lstm_forward = lstm_forward.add_input(entry.vec)
                lstm_backward = lstm_backward.add_input(rentry.vec)

                entry.ner_lstms[1] = lstm_forward.output()
                rentry.ner_lstms[0] = lstm_backward.output()

            for entry in sentence:
                entry.ner_vec = concatenate(entry.ner_lstms)

            blstm_forward = self.ner_bbuilders[0].initial_state()
            blstm_backward = self.ner_bbuilders[1].initial_state()

            for entry, rentry in zip(sentence, reversed(sentence)):
                blstm_forward = blstm_forward.add_input(entry.ner_vec)
                blstm_backward = blstm_backward.add_input(rentry.ner_vec)
                entry.ner_lstms[1] = blstm_forward.output()
                rentry.ner_lstms[0] = blstm_backward.output()

            concat_layer = [concatenate(entry.ner_lstms) for entry in sentence]

            context_representations = [dynet.tanh(dynet.affine_transform([self.tanh_layer_b.expr(),
                                                                          self.tanh_layer_W.expr(),
                                                                          context])) \
                                       for context in concat_layer]

            tag_scores = [dynet.affine_transform([self.last_layer_b.expr(),
                                                  self.last_layer_W.expr(),
                                                  context]) \
                          for context in context_representations]

            observations = [
                dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])],
                                  d=0) for obs in tag_scores
            ]

            predicted_ner_indices, _ = self.crf_module.viterbi_decoding(
                observations)

            predicted_nertags = [
                self.id2ner[idx] for idx in predicted_ner_indices
            ]

            outputPredNER[sentenceID] = predicted_nertags

            # Add ner embeddings
            for entry, ner in zip(sentence, predicted_ner_indices):
                entry.vec = concatenate([entry.vec, self.nerlookup[ner]])
                entry.lstms = [entry.vec, entry.vec]

            # Relation losses
            if self.blstmFlag:
                lstm_forward = self.builders[0].initial_state()
                lstm_backward = self.builders[1].initial_state()

                for entry, rentry in zip(sentence, reversed(sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.lstms[1] = lstm_forward.output()
                    rentry.lstms[0] = lstm_backward.output()

                if self.bibiFlag:
                    for entry in sentence:
                        entry.vec = concatenate(entry.lstms)

                    blstm_forward = self.bbuilders[0].initial_state()
                    blstm_backward = self.bbuilders[1].initial_state()

                    for entry, rentry in zip(sentence, reversed(sentence)):
                        blstm_forward = blstm_forward.add_input(entry.vec)
                        blstm_backward = blstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = blstm_forward.output()
                        rentry.lstms[0] = blstm_backward.output()

            concat_layer = [concatenate(entry.lstms) for entry in sentence]

            head_context_representations = [dynet.tanh(dynet.affine_transform([self.head_layer_b.expr(),
                                                                               self.head_layer_W.expr(),
                                                                               context])) \
                                            for context in concat_layer]

            dep_context_representations = [dynet.tanh(dynet.affine_transform([self.dep_layer_b.expr(),
                                                                              self.dep_layer_W.expr(),
                                                                              context])) \
                                           for context in concat_layer]

            nerids = []
            for ind, tag in enumerate(predicted_nertags):
                # print(ind, tag)
                if str(tag).startswith("L-") or str(tag).startswith("U-"):
                    if not str(tag).endswith("-Other"):
                        nerids.append(ind)

            id2arg2rel[sentenceID] = {}
            for ind1 in nerids:
                for ind2 in nerids:
                    if ind1 != ind2:
                        if predicted_nertags[ind1] in [
                                "L-Peop", "U-Peop"
                        ] and predicted_nertags[ind2] in [
                                "L-Peop", "U-Peop", "L-Org", "U-Org", "L-Loc",
                                "U-Loc"
                        ]:
                            id2arg2rel[sentenceID][(ind1, ind2)] = "NEG"
                        if predicted_nertags[ind1] in [
                                "L-Loc", "U-Loc", "L-Org", "U-Org"
                        ] and predicted_nertags[ind2] in ["L-Loc", "U-Loc"]:
                            id2arg2rel[sentenceID][(ind1, ind2)] = "NEG"
                        # id2arg2rel[sentenceID][(ind1, ind2)] = "NEG"

            for (head, dep) in id2arg2rel[sentenceID]:
                # print (head, dep), pairrels[(head, dep)]
                linear = self.rel_U.expr() * dep_context_representations[dep]
                linear = dynet.reshape(linear,
                                       (self.hidden_units, len(self.rels)))
                bilinear = dynet.transpose(
                    head_context_representations[head]) * linear
                biaffine = dynet.transpose(
                    bilinear) + self.rel_W.expr() * concatenate([
                        head_context_representations[head],
                        dep_context_representations[dep]
                    ]) + self.rel_b.expr()
                id2arg2rel[sentenceID][(head, dep)] = self.id2rels[np.argmax(
                    softmax(biaffine).value())]

            outputPredRel[sentenceID] = {}
            for (head, dep) in id2arg2rel[sentenceID]:
                rel = id2arg2rel[sentenceID][(head, dep)]
                if rel != "NEG":
                    outputPredRel[sentenceID][(head, dep)] = rel
                # else:
                #    i_rel = id2arg2rel[sentenceID][(dep, head)]
                #    if str(i_rel).endswith("-1"):
                #        outputPredRel[sentenceID][(head, dep)] = i_rel[:-2]

            renew_cg()
            # print "----"
            # print outputPredNER[sentenceID]
            # print id2arg2rel[sentenceID]
            # print outputPredRel[sentenceID]

        return outputPredNER, outputPredRel

    def Train(self, train_data, train_id2nerBILOU, id2arg2rel, isTrain=True):
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()
        nwtotal = 0

        if isTrain:
            shuffledData = train_data.keys()
            random.shuffle(shuffledData)

            # errs = []
            lerrs = []
            nerErrs = []

            for iSentence, sentenceId in enumerate(shuffledData):
                if iSentence % 100 == 0 and iSentence != 0:
                    print "Processing sentence number: %d" % iSentence, ", Loss: %.4f" % (
                        eloss / etotal), ", Time: %.2f" % (time.time() - start)
                    start = time.time()
                    eerrors = 0
                    eloss = 0.0
                    etotal = 0

                sentence = train_data[sentenceId]
                goldNers = train_id2nerBILOU[sentenceId].strip().split()

                for entry in sentence:
                    c = float(self.wordsCount.get(entry.norm, 0))
                    dropFlag = (random.random() < (c / (0.25 + c)))
                    wordvec = self.wlookup[
                        int(self.vocab.get(entry.norm, 0)
                            ) if dropFlag else 0] if self.wdims > 0 else None

                    last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in entry.idChars])[-1]
                    rev_last_state = self.char_rnn.predict_sequence(
                        [self.clookup[c] for c in reversed(entry.idChars)])[-1]

                    entry.vec = dynet.dropout(
                        concatenate(
                            filter(None,
                                   [wordvec, last_state, rev_last_state])),
                        0.33)

                    entry.ner_lstms = [entry.vec, entry.vec]
                    entry.headfov = None
                    entry.modfov = None

                    entry.rheadfov = None
                    entry.rmodfov = None

                # ner tagging loss
                lstm_forward = self.ner_builders[0].initial_state()
                lstm_backward = self.ner_builders[1].initial_state()
                for entry, rentry in zip(sentence, reversed(sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.ner_lstms[1] = lstm_forward.output()
                    rentry.ner_lstms[0] = lstm_backward.output()

                for entry in sentence:
                    entry.ner_vec = concatenate(entry.ner_lstms)

                blstm_forward = self.ner_bbuilders[0].initial_state()
                blstm_backward = self.ner_bbuilders[1].initial_state()

                for entry, rentry in zip(sentence, reversed(sentence)):
                    blstm_forward = blstm_forward.add_input(entry.ner_vec)
                    blstm_backward = blstm_backward.add_input(rentry.ner_vec)
                    entry.ner_lstms[1] = blstm_forward.output()
                    rentry.ner_lstms[0] = blstm_backward.output()

                concat_layer = [
                    dynet.dropout(concatenate(entry.ner_lstms), 0.33)
                    for entry in sentence
                ]

                context_representations = [dynet.tanh(dynet.affine_transform([self.tanh_layer_b.expr(),
                                                                              self.tanh_layer_W.expr(),
                                                                              context])) \
                                           for context in concat_layer]

                tag_scores = [dynet.affine_transform([self.last_layer_b.expr(),
                                                      self.last_layer_W.expr(),
                                                      context]) \
                              for context in context_representations]

                nerIDs = [self.ner.get(tag) for tag in goldNers]

                loss = self.crf_module.neg_log_loss(tag_scores, nerIDs)
                # loss, _ = self.crf_module.viterbi_loss(tag_scores, nerIDs)

                nerErrs.append(loss)

                # observations = [dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])], d=0) for obs in
                #                tag_scores]
                # predicted_ner_indices, _ = self.crf_module.viterbi_decoding(observations)

                # Add ner embeddings
                for entry, ner in zip(sentence, nerIDs):
                    entry.vec = concatenate(
                        [entry.vec,
                         dynet.dropout(self.nerlookup[ner], 0.33)])
                    entry.lstms = [entry.vec, entry.vec]

                # Relation losses
                if self.blstmFlag:
                    lstm_forward = self.builders[0].initial_state()
                    lstm_backward = self.builders[1].initial_state()

                    for entry, rentry in zip(sentence, reversed(sentence)):
                        lstm_forward = lstm_forward.add_input(entry.vec)
                        lstm_backward = lstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = lstm_forward.output()
                        rentry.lstms[0] = lstm_backward.output()

                    if self.bibiFlag:
                        for entry in sentence:
                            entry.vec = concatenate(entry.lstms)

                        blstm_forward = self.bbuilders[0].initial_state()
                        blstm_backward = self.bbuilders[1].initial_state()

                        for entry, rentry in zip(sentence, reversed(sentence)):
                            blstm_forward = blstm_forward.add_input(entry.vec)
                            blstm_backward = blstm_backward.add_input(
                                rentry.vec)

                            entry.lstms[1] = blstm_forward.output()
                            rentry.lstms[0] = blstm_backward.output()

                concat_layer = [
                    dynet.dropout(concatenate(entry.lstms), 0.33)
                    for entry in sentence
                ]

                head_context_representations = [dynet.tanh(dynet.affine_transform([self.head_layer_b.expr(),
                                                                                   self.head_layer_W.expr(),
                                                                                   context])) \
                                                for context in concat_layer]

                dep_context_representations = [dynet.tanh(dynet.affine_transform([self.dep_layer_b.expr(),
                                                                                  self.dep_layer_W.expr(),
                                                                                  context])) \
                                               for context in concat_layer]

                pairrels = id2arg2rel[sentenceId]
                for (head, dep) in pairrels:
                    # print (head, dep), pairrels[(head, dep)]
                    linear = self.rel_U.expr(
                    ) * dep_context_representations[dep]
                    linear = dynet.reshape(linear,
                                           (self.hidden_units, len(self.rels)))
                    bilinear = dynet.transpose(
                        head_context_representations[head]) * linear
                    biaffine = dynet.transpose(
                        bilinear) + self.rel_W.expr() * concatenate([
                            head_context_representations[head],
                            dep_context_representations[dep]
                        ]) + self.rel_b.expr()
                    lerrs.append(
                        self.pick_neg_log(softmax(biaffine),
                                          self.rels.get(pairrels[(head,
                                                                  dep)])))

                etotal += len(sentence)
                nwtotal += len(sentence)

                if iSentence % 1 == 0:
                    if len(lerrs) > 0 or len(nerErrs) > 0:
                        # if len(nerErrs) > 0:
                        eerrs = esum(nerErrs + lerrs)
                        eerrs.scalar_value()
                        eloss += eerrs.scalar_value()
                        mloss += eloss
                        eerrs.backward()
                        self.trainer.update()
                        # errs = []
                        lerrs = []
                        nerErrs = []

                    renew_cg()

        print "Loss: %.4f" % (mloss / nwtotal)
Esempio n. 6
0
class Learner:
    def __init__(self, c2i, options):
        self.model = dy.ParameterCollection()
        random.seed(1)
        self.trainer = dy.AdamTrainer(self.model)

        self.dropput_rate = options.dropout_rate
        self.ldims = options.lstm_dims
        self.cdims = options.cembedding_dims

        self.c2i = c2i

        self.W_d = self.model.add_parameters((self.ldims, 2 * self.ldims))
        self.W_db = self.model.add_parameters(self.ldims)

        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))

        self.word_encoder = RNNSequencePredictor(
            dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model))
        self.context_encoder = [
            dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model),
            dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model)
        ]
        self.output_encoder = dy.VanillaLSTMBuilder(1, self.cdims, self.ldims,
                                                    self.model)

        self.decoder = dy.VanillaLSTMBuilder(2, self.cdims, self.ldims,
                                             self.model)

        self.W_s = self.model.add_parameters((len(self.c2i), self.ldims))
        self.W_sb = self.model.add_parameters((len(self.c2i)))

    def save(self, filename):
        self.model.save(filename)

    def load(self, filename):
        self.model.populate(filename)

    def predict(self, conll_path):
        with open(conll_path, 'r') as conllFP:
            for iSentence, sentence in enumerate(
                    read_conll(
                        conllFP,
                        self.c2i,
                    )):
                dy.renew_cg()

                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]
                # I- Word encoding
                for entry in conll_sentence:
                    c_embeddings = []
                    for c in entry.idChars:
                        # TODO : try different formulas like alpha/(alpha + #(w))
                        dropFlag = False  # random.random() < self.dropput_rate
                        c_embedding = self.clookup[c if not dropFlag else 0]
                        c_embeddings.append(c_embedding)

                    e_i = self.word_encoder.predict_sequence(c_embeddings)[-1]
                    entry.word_enc = dy.dropout(e_i, self.dropput_rate)
                    entry.context_lstms = [entry.word_enc, entry.word_enc]

                # II- Context encoding
                blstm_forward = self.context_encoder[0].initial_state()
                blstm_backward = self.context_encoder[1].initial_state()
                for entry, rentry in zip(conll_sentence,
                                         reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.word_enc)
                    blstm_backward = blstm_backward.add_input(rentry.word_enc)

                    entry.context_lstms[1] = blstm_forward.output()
                    rentry.context_lstms[0] = blstm_backward.output()

                for entry in conll_sentence:
                    entry.context_enc = dy.concatenate(entry.context_lstms)
                """
                # III- Output encoding
                c_embeddings = []
                for entry in conll_sentence:
                    for f in entry.idFeats:
                        # TODO : try different formulas like alpha/(alpha + #(w))
                        dropFlag = False  # random.random() < self.dropput_rate
                        c_embedding = self.clookup[f if not dropFlag else 0]
                        c_embeddings.append(c_embedding)
                t_i = self.output_encoder.predict_sequence(c_embeddings)

                for entry, vec in zip(conll_sentence, t_i):
                    entry.output_enc = dy.dropout(vec, self.dropput_rate)
                """
                # IV- Decoder
                # Init for Context encoding
                for entry in conll_sentence:
                    entry.context_enc = dy.rectify(self.W_d.expr() *
                                                   entry.context_enc +
                                                   self.W_db.expr())
                """
                # Init for Word and Output encoding
                for entry in conll_sentence:
                    entry.comb = entry.word_enc + entry.output_enc
                """

                # predicted_sequences = []
                output_state = self.output_encoder.initial_state()
                for entry in conll_sentence:
                    if output_state.output():
                        entry.comb = entry.word_enc + output_state.output()
                    else:
                        entry.comb = entry.word_enc

                    decoder_state = self.decoder.initial_state().set_s([
                        entry.context_enc,
                        dy.tanh(entry.context_enc), entry.comb,
                        dy.tanh(entry.comb)
                    ])
                    predicted_sequence = []
                    predicted_char = self.c2i["<s>"]
                    counter = 0
                    while True:
                        counter += 1
                        decoder_state.add_input(self.clookup[predicted_char])
                        probs = self._get_probs(decoder_state.output())
                        predicted_char = probs.npvalue().argmax()
                        if predicted_char != self.c2i["</s>"] and counter < 50:
                            predicted_sequence.append(predicted_char)
                        else:
                            break
                    for seq_i in predicted_sequence:
                        tag_embedding = self.clookup[seq_i]
                        decoder_state.add_input(tag_embedding)
                    entry.predicted_sequence = predicted_sequence
                    # predicted_sequences.append(predicted_sequence)

                yield conll_sentence

    def _get_probs(self, rnn_output):
        output_w = dy.parameter(self.W_s)
        output_b = dy.parameter(self.W_sb)

        probs = dy.softmax(output_w * rnn_output + output_b)
        return probs

    def train(self, conll_path):
        total = 0.0
        with open(conll_path, 'r') as conllFP:
            shuffledData = list(read_conll(conllFP, self.c2i))
            random.shuffle(shuffledData)

            for iSentence, sentence in enumerate(shuffledData):
                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]

                # I- Word encoding
                for entry in conll_sentence:
                    c_embeddings = []
                    for c in entry.idChars:
                        # TODO : try different formulas like alpha/(alpha + #(w))
                        dropFlag = False  # random.random() < self.dropput_rate
                        c_embedding = self.clookup[c if not dropFlag else 0]
                        c_embeddings.append(c_embedding)

                    e_i = self.word_encoder.predict_sequence(c_embeddings)[-1]
                    entry.word_enc = dy.dropout(e_i, self.dropput_rate)
                    entry.context_lstms = [entry.word_enc, entry.word_enc]

                # II- Context encoding
                blstm_forward = self.context_encoder[0].initial_state()
                blstm_backward = self.context_encoder[1].initial_state()
                for entry, rentry in zip(conll_sentence,
                                         reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.word_enc)
                    blstm_backward = blstm_backward.add_input(rentry.word_enc)

                    entry.context_lstms[1] = blstm_forward.output()
                    rentry.context_lstms[0] = blstm_backward.output()

                for entry in conll_sentence:
                    entry.context_enc = dy.concatenate(entry.context_lstms)

                # III- Output encoding
                c_embeddings = []
                for entry in conll_sentence:
                    for f in entry.idFeats:
                        # TODO : try different formulas like alpha/(alpha + #(w))
                        dropFlag = False  # random.random() < self.dropput_rate
                        c_embedding = self.clookup[f if not dropFlag else 0]
                        c_embeddings.append(c_embedding)
                output_encoder = RNNSequencePredictor(self.output_encoder)
                t_i = output_encoder.predict_sequence(c_embeddings)

                for entry, vec in zip(conll_sentence, t_i):
                    entry.output_enc = dy.dropout(vec, self.dropput_rate)

                # IV- Decoder
                # Init for Context encoding
                for entry in conll_sentence:
                    entry.context_enc = dy.rectify(self.W_d.expr() *
                                                   entry.context_enc +
                                                   self.W_db.expr())

                # Init for Word and Output encoding
                for entry in conll_sentence:
                    entry.comb = entry.word_enc + entry.output_enc

                probs = []
                losses = []

                for entry in conll_sentence:
                    decoder_state = self.decoder.initial_state().set_s([
                        entry.context_enc,
                        dy.tanh(entry.context_enc), entry.comb,
                        dy.tanh(entry.comb)
                    ])

                    for g_c in entry.decoder_input:
                        decoder_state = decoder_state.add_input(
                            self.clookup[g_c])
                        p = self._get_probs(decoder_state.output())
                        probs.append(p)

                    losses += [
                        -dy.log(dy.pick(p, o))
                        for p, o in zip(probs, entry.decoder_input)
                    ]

                total_losses = dy.esum(losses)
                cur_loss = total_losses.scalar_value()
                total += cur_loss
                total_losses.backward()
                self.trainer.update()
                if iSentence != 0 and iSentence % 500 == 0:
                    print("Sentence:" + str(iSentence) + " Loss:" +
                          str(total / (iSentence + 1)))
                dy.renew_cg()
Esempio n. 7
0
    def train(self, conll_path):
        total = 0.0
        with open(conll_path, 'r') as conllFP:
            shuffledData = list(read_conll(conllFP, self.c2i))
            random.shuffle(shuffledData)

            for iSentence, sentence in enumerate(shuffledData):
                conll_sentence = [
                    entry for entry in sentence
                    if isinstance(entry, utils.ConllEntry)
                ]

                # I- Word encoding
                for entry in conll_sentence:
                    c_embeddings = []
                    for c in entry.idChars:
                        # TODO : try different formulas like alpha/(alpha + #(w))
                        dropFlag = False  # random.random() < self.dropput_rate
                        c_embedding = self.clookup[c if not dropFlag else 0]
                        c_embeddings.append(c_embedding)

                    e_i = self.word_encoder.predict_sequence(c_embeddings)[-1]
                    entry.word_enc = dy.dropout(e_i, self.dropput_rate)
                    entry.context_lstms = [entry.word_enc, entry.word_enc]

                # II- Context encoding
                blstm_forward = self.context_encoder[0].initial_state()
                blstm_backward = self.context_encoder[1].initial_state()
                for entry, rentry in zip(conll_sentence,
                                         reversed(conll_sentence)):
                    blstm_forward = blstm_forward.add_input(entry.word_enc)
                    blstm_backward = blstm_backward.add_input(rentry.word_enc)

                    entry.context_lstms[1] = blstm_forward.output()
                    rentry.context_lstms[0] = blstm_backward.output()

                for entry in conll_sentence:
                    entry.context_enc = dy.concatenate(entry.context_lstms)

                # III- Output encoding
                c_embeddings = []
                for entry in conll_sentence:
                    for f in entry.idFeats:
                        # TODO : try different formulas like alpha/(alpha + #(w))
                        dropFlag = False  # random.random() < self.dropput_rate
                        c_embedding = self.clookup[f if not dropFlag else 0]
                        c_embeddings.append(c_embedding)
                output_encoder = RNNSequencePredictor(self.output_encoder)
                t_i = output_encoder.predict_sequence(c_embeddings)

                for entry, vec in zip(conll_sentence, t_i):
                    entry.output_enc = dy.dropout(vec, self.dropput_rate)

                # IV- Decoder
                # Init for Context encoding
                for entry in conll_sentence:
                    entry.context_enc = dy.rectify(self.W_d.expr() *
                                                   entry.context_enc +
                                                   self.W_db.expr())

                # Init for Word and Output encoding
                for entry in conll_sentence:
                    entry.comb = entry.word_enc + entry.output_enc

                probs = []
                losses = []

                for entry in conll_sentence:
                    decoder_state = self.decoder.initial_state().set_s([
                        entry.context_enc,
                        dy.tanh(entry.context_enc), entry.comb,
                        dy.tanh(entry.comb)
                    ])

                    for g_c in entry.decoder_input:
                        decoder_state = decoder_state.add_input(
                            self.clookup[g_c])
                        p = self._get_probs(decoder_state.output())
                        probs.append(p)

                    losses += [
                        -dy.log(dy.pick(p, o))
                        for p, o in zip(probs, entry.decoder_input)
                    ]

                total_losses = dy.esum(losses)
                cur_loss = total_losses.scalar_value()
                total += cur_loss
                total_losses.backward()
                self.trainer.update()
                if iSentence != 0 and iSentence % 500 == 0:
                    print("Sentence:" + str(iSentence) + " Loss:" +
                          str(total / (iSentence + 1)))
                dy.renew_cg()