def __init__(self, c2i, options): self.model = dy.ParameterCollection() random.seed(1) self.trainer = dy.AdamTrainer(self.model) self.dropput_rate = options.dropout_rate self.ldims = options.lstm_dims self.cdims = options.cembedding_dims self.c2i = c2i self.W_d = self.model.add_parameters((self.ldims, 2 * self.ldims)) self.W_db = self.model.add_parameters(self.ldims) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.word_encoder = RNNSequencePredictor( dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model)) self.context_encoder = [ dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model) ] self.output_encoder = dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model) self.decoder = dy.VanillaLSTMBuilder(2, self.cdims, self.ldims, self.model) self.W_s = self.model.add_parameters((len(self.c2i), self.ldims)) self.W_sb = self.model.add_parameters((len(self.c2i)))
def __init__(self, vocab, pos, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)] self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] if self.bibiFlag: self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)] self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)] self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model))
class jPosDepLearner: def __init__(self, vocab, pos, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)] self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] if self.bibiFlag: self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)] self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)] self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model)) def __getExpr(self, sentence, i, j): if sentence[i].headfov is None: sentence[i].headfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) _inputVector = concatenate( [sentence[i].headfov, sentence[j].modfov, dynet.abs(sentence[i].headfov - sentence[j].modfov), dynet.cmult(sentence[i].headfov, sentence[j].modfov)]) if self.hidden_units > 0: output = self.outLayer.expr() * self.activation( self.hid2Bias.expr() + self.hidLayer.expr() * self.activation( _inputVector + self.hidBias.expr())) else: output = self.outLayer.expr() * self.activation(_inputVector + self.hidBias.expr()) return output def __evaluate(self, sentence): exprs = [[self.__getExpr(sentence, i, j) for j in xrange(len(sentence))] for i in xrange(len(sentence))] scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs]) return scores, exprs def pick_neg_log(self, pred, gold): return -dynet.log(dynet.pick(pred, gold)) def __getRelVector(self, sentence, i, j): if sentence[i].rheadfov is None: sentence[i].rheadfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].rmodfov is None: sentence[j].rmodfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) _outputVector = concatenate( [sentence[i].rheadfov, sentence[j].rmodfov, abs(sentence[i].rheadfov - sentence[j].rmodfov), cmult(sentence[i].rheadfov, sentence[j].rmodfov)]) if self.hidden_units > 0: return self.rhid2Bias.expr() + self.rhidLayer.expr() * self.activation( _outputVector + self.rhidBias.expr()) else: return _outputVector def Save(self, filename): self.model.save(filename) def Load(self, filename): self.model.populate(filename) def PredictFromText(self, text_file,output_file): #pdb.set_trace() #with open(output_file,'w',buffering=20*(1024**2)) as ofp: # with open(text_file, 'r',buffering=20*(1024**2)) as t_fp: with open(output_file,'w') as ofp: with open(text_file, 'r') as t_fp: try: for line in t_fp: itf = tempfile.NamedTemporaryFile(delete=False) otf = tempfile.NamedTemporaryFile(delete=False) with open(itf.name,'w'): itf.write(line) itf.flush() conllConvertToFile(itf.name,otf.name) conll_gen = self.Predict(otf.name) for sentence in conll_gen: for entry in sentence[1:]: fields = str(entry).split('\t') if (len(fields) > 1): ofp.write(fields[0] + '\t' + fields[1] + '\t' + fields[3] + '\t' + fields[6] + '\t' + fields[7] + '\n') else: ofp.write(str(entry) + '\n') ofp.write('\n') os.remove(itf.name) os.remove(otf.name) except RuntimeError as re: print("Unexpected error:", sys.exc_info()[0]) traceback.print_exc(file=sys.stdout) #instead of pass clear sys.exc_clear() def Predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate(read_conll_predict(conllFP, self.c2i, self.wordsCount)): conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] for entry in conll_sentence: wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[ -1] entry.vec = concatenate(filter(None, [wordvec, last_state, rev_last_state])) entry.pos_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None #Predicted pos tags lstm_forward = self.pos_builders[0].initial_state() lstm_backward = self.pos_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.pos_lstms[1] = lstm_forward.output() rentry.pos_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.pos_vec = concatenate(entry.pos_lstms) blstm_forward = self.pos_bbuilders[0].initial_state() blstm_backward = self.pos_bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.pos_vec) blstm_backward = blstm_backward.add_input(rentry.pos_vec) entry.pos_lstms[1] = blstm_forward.output() rentry.pos_lstms[0] = blstm_backward.output() concat_layer = [concatenate(entry.pos_lstms) for entry in conll_sentence] outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer) predicted_pos_indices = [np.argmax(o.value()) for o in outputFFlayer] predicted_postags = [self.id2pos[idx] for idx in predicted_pos_indices] # Add predicted pos tags for parsing prediction for entry, posid in zip(conll_sentence, predicted_pos_indices): entry.vec = concatenate([entry.vec, self.plookup[posid]]) entry.lstms = [entry.vec, entry.vec] if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence) heads = decoder.parse_proj(scores) # Multiple roots: heading to the previous "rooted" one rootCount = 0 rootWid = -1 for index, head in enumerate(heads): if head == 0: rootCount += 1 if rootCount == 1: rootWid = index if rootCount > 1: heads[index] = rootWid rootWid = index for entry, head, pos in zip(conll_sentence, heads, predicted_postags): entry.pred_parent_id = head entry.pred_relation = '_' entry.pred_pos = pos dump = False if self.labelsFlag: concat_layer = [self.__getRelVector(conll_sentence, head, modifier + 1) for modifier, head in enumerate(heads[1:])] outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer) predicted_rel_indices = [np.argmax(o.value()) for o in outputFFlayer] predicted_rels = [self.irels[idx] for idx in predicted_rel_indices] for modifier, head in enumerate(heads[1:]): conll_sentence[modifier + 1].pred_relation = predicted_rels[modifier] renew_cg() if not dump: yield sentence def Train(self, conll_path): eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, self.c2i)) random.shuffle(shuffledData) errs = [] lerrs = [] posErrs = [] for iSentence, sentence in enumerate(shuffledData): if iSentence % 500 == 0 and iSentence != 0: print "Processing sentence number: %d" % iSentence, ", Loss: %.4f" % ( eloss / etotal), ", Time: %.2f" % (time.time() - start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] for entry in conll_sentence: c = float(self.wordsCount.get(entry.norm, 0)) dropFlag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup[ int(self.vocab.get(entry.norm, 0)) if dropFlag else 0] if self.wdims > 0 else None last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[ -1] entry.vec = dynet.dropout(concatenate(filter(None, [wordvec, last_state, rev_last_state])), 0.33) entry.pos_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None #POS tagging loss lstm_forward = self.pos_builders[0].initial_state() lstm_backward = self.pos_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.pos_lstms[1] = lstm_forward.output() rentry.pos_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.pos_vec = concatenate(entry.pos_lstms) blstm_forward = self.pos_bbuilders[0].initial_state() blstm_backward = self.pos_bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.pos_vec) blstm_backward = blstm_backward.add_input(rentry.pos_vec) entry.pos_lstms[1] = blstm_forward.output() rentry.pos_lstms[0] = blstm_backward.output() concat_layer = [dynet.dropout(concatenate(entry.pos_lstms), 0.33) for entry in conll_sentence] outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer) posIDs = [self.pos.get(entry.pos) for entry in conll_sentence] for pred, gold in zip(outputFFlayer, posIDs): posErrs.append(self.pick_neg_log(pred, gold)) # Add predicted pos tags for entry, poses in zip(conll_sentence, outputFFlayer): entry.vec = concatenate([entry.vec, dynet.dropout(self.plookup[np.argmax(poses.value())], 0.33)]) entry.lstms = [entry.vec, entry.vec] #Parsing losses if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence) gold = [entry.parent_id for entry in conll_sentence] heads = decoder.parse_proj(scores, gold if self.costaugFlag else None) if self.labelsFlag: concat_layer = [dynet.dropout(self.__getRelVector(conll_sentence, head, modifier + 1), 0.33) for modifier, head in enumerate(gold[1:])] outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer) relIDs = [self.rels[conll_sentence[modifier + 1].relation] for modifier, _ in enumerate(gold[1:])] for pred, goldid in zip(outputFFlayer, relIDs): lerrs.append(self.pick_neg_log(pred, goldid)) e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) eerrors += e if e > 0: loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) eloss += (e) mloss += (e) errs.extend(loss) etotal += len(conll_sentence) if iSentence % 1 == 0: if len(errs) > 0 or len(lerrs) > 0 or len(posErrs) > 0: eerrs = (esum(errs + lerrs + posErrs)) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] posErrs = [] renew_cg() print "Loss: %.4f" % (mloss / iSentence)
def __init__(self, vocab, pos, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #self.trainer = SimpleSGDTrainer(self.model) self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.external_embedding, self.edim = None, 0 if options.external_embedding is not None: external_embedding_fp = open(options.external_embedding, 'r') external_embedding_fp.readline() self.external_embedding = { line.split(' ')[0]: [float(f) for f in line.strip().split(' ')[1:]] for line in external_embedding_fp } external_embedding_fp.close() self.edim = len(self.external_embedding.values()[0]) self.noextrn = [0.0 for _ in xrange(self.edim)] self.extrnd = { word: i + 3 for i, word in enumerate(self.external_embedding) } self.elookup = self.model.add_lookup_parameters( (len(self.external_embedding) + 3, self.edim)) for word, i in self.extrnd.iteritems(): self.elookup.init_row(i, self.external_embedding[word]) self.extrnd['*PAD*'] = 1 self.extrnd['*INITIAL*'] = 2 print 'Load external embedding. Vector dimensions', self.edim if self.bibiFlag: self.builders = [ VanillaLSTMBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model) ] self.bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] elif self.layers > 0: self.builders = [ VanillaLSTMBuilder(self.layers, self.wdims + self.edim, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.edim, self.ldims, self.model) ] else: self.builders = [ SimpleRNNBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model) ] self.ffSeqPredictor = FFSequencePredictor( Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.hidLayerFOH = self.model.add_parameters( (self.hidden_units, self.ldims * 2)) self.hidLayerFOM = self.model.add_parameters( (self.hidden_units, self.ldims * 2)) self.hidBias = self.model.add_parameters((self.hidden_units)) self.hid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.hid2Bias = self.model.add_parameters((self.hidden2_units)) self.outLayer = self.model.add_parameters( (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) if self.labelsFlag: self.rhidLayerFOH = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.rhidLayerFOM = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.rhidBias = self.model.add_parameters((self.hidden_units)) self.rhid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.routBias = self.model.add_parameters((len(self.irels))) self.char_rnn = RNNSequencePredictor( LSTMBuilder(1, self.cdims, self.cdims, self.model))
class jPosDepLearner: def __init__(self, vocab, pos, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #self.trainer = SimpleSGDTrainer(self.model) self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.external_embedding, self.edim = None, 0 if options.external_embedding is not None: external_embedding_fp = open(options.external_embedding, 'r') external_embedding_fp.readline() self.external_embedding = { line.split(' ')[0]: [float(f) for f in line.strip().split(' ')[1:]] for line in external_embedding_fp } external_embedding_fp.close() self.edim = len(self.external_embedding.values()[0]) self.noextrn = [0.0 for _ in xrange(self.edim)] self.extrnd = { word: i + 3 for i, word in enumerate(self.external_embedding) } self.elookup = self.model.add_lookup_parameters( (len(self.external_embedding) + 3, self.edim)) for word, i in self.extrnd.iteritems(): self.elookup.init_row(i, self.external_embedding[word]) self.extrnd['*PAD*'] = 1 self.extrnd['*INITIAL*'] = 2 print 'Load external embedding. Vector dimensions', self.edim if self.bibiFlag: self.builders = [ VanillaLSTMBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model) ] self.bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] elif self.layers > 0: self.builders = [ VanillaLSTMBuilder(self.layers, self.wdims + self.edim, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.edim, self.ldims, self.model) ] else: self.builders = [ SimpleRNNBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.edim + self.cdims * 2, self.ldims, self.model) ] self.ffSeqPredictor = FFSequencePredictor( Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.hidLayerFOH = self.model.add_parameters( (self.hidden_units, self.ldims * 2)) self.hidLayerFOM = self.model.add_parameters( (self.hidden_units, self.ldims * 2)) self.hidBias = self.model.add_parameters((self.hidden_units)) self.hid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.hid2Bias = self.model.add_parameters((self.hidden2_units)) self.outLayer = self.model.add_parameters( (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) if self.labelsFlag: self.rhidLayerFOH = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.rhidLayerFOM = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.rhidBias = self.model.add_parameters((self.hidden_units)) self.rhid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.routBias = self.model.add_parameters((len(self.irels))) self.char_rnn = RNNSequencePredictor( LSTMBuilder(1, self.cdims, self.cdims, self.model)) def __getExpr(self, sentence, i, j, train): if sentence[i].headfov is None: sentence[i].headfov = self.hidLayerFOH.expr() * concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = self.hidLayerFOM.expr() * concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) if self.hidden2_units > 0: output = self.outLayer.expr() * self.activation( self.hid2Bias.expr() + self.hid2Layer.expr() * self.activation(sentence[i].headfov + sentence[j].modfov + self.hidBias.expr())) # + self.outBias else: output = self.outLayer.expr() * self.activation( sentence[i].headfov + sentence[j].modfov + self.hidBias.expr()) # + self.outBias return output def __evaluate(self, sentence, train): exprs = [[ self.__getExpr(sentence, i, j, train) for j in xrange(len(sentence)) ] for i in xrange(len(sentence))] scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs]) return scores, exprs def pick_neg_log(self, pred, gold): return -dynet.log(dynet.pick(pred, gold)) def __evaluateLabel(self, sentence, i, j): if sentence[i].rheadfov is None: sentence[i].rheadfov = self.rhidLayerFOH.expr() * concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].rmodfov is None: sentence[j].rmodfov = self.rhidLayerFOM.expr() * concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) if self.hidden2_units > 0: output = self.routLayer.expr() * self.activation( self.rhid2Bias.expr() + self.rhid2Layer.expr() * self.activation(sentence[i].rheadfov + sentence[j].rmodfov + self.rhidBias.expr())) + self.routBias.expr() else: output = self.routLayer.expr() * self.activation( sentence[i].rheadfov + sentence[j].rmodfov + self.rhidBias.expr()) + self.routBias.expr() return output.value(), output def Save(self, filename): self.model.save(filename) def Load(self, filename): self.model.populate(filename) def Predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate(read_conll(conllFP, self.c2i)): conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] for entry in conll_sentence: wordvec = self.wlookup[int(self.vocab.get( entry.norm, 0))] if self.wdims > 0 else None evec = self.elookup[int( self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)) )] if self.external_embedding is not None else None last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = concatenate( filter(None, [wordvec, evec, last_state, rev_last_state])) entry.lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input( rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence, True) heads = decoder.parse_proj(scores) #Multiple roots: heading to the previous "rooted" one rootCount = 0 rootWid = -1 for index, head in enumerate(heads): if head == 0: rootCount += 1 if rootCount == 1: rootWid = index if rootCount > 1: heads[index] = rootWid rootWid = index concat_layer = [ concatenate(entry.lstms) for entry in conll_sentence ] outputFFlayer = self.ffSeqPredictor.predict_sequence( concat_layer) predicted_pos_indices = [ np.argmax(o.value()) for o in outputFFlayer ] predicted_postags = [ self.id2pos[idx] for idx in predicted_pos_indices ] for entry, head, pos in zip(conll_sentence, heads, predicted_postags): entry.pred_parent_id = head entry.pred_relation = '_' entry.pred_pos = pos dump = False if self.labelsFlag: for modifier, head in enumerate(heads[1:]): scores, exprs = self.__evaluateLabel( conll_sentence, head, modifier + 1) conll_sentence[modifier + 1].pred_relation = self.irels[max( enumerate(scores), key=itemgetter(1))[0]] renew_cg() if not dump: yield sentence def Train(self, conll_path): errors = 0 batch = 0 eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, self.c2i)) random.shuffle(shuffledData) errs = [] lerrs = [] posErrs = [] eeloss = 0.0 for iSentence, sentence in enumerate(shuffledData): if iSentence % 500 == 0 and iSentence != 0: print "Processing sentence number: %d" % iSentence, ", Loss: %.2f" % ( eloss / etotal), ", Time: %.2f" % (time.time() - start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 ltotal = 0 conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] for entry in conll_sentence: c = float(self.wordsCount.get(entry.norm, 0)) dropFlag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup[ int(self.vocab.get(entry.norm, 0) ) if dropFlag else 0] if self.wdims > 0 else None evec = None if self.external_embedding is not None: evec = self.elookup[self.extrnd.get( entry.form, self.extrnd.get(entry.norm, 0)) if (dropFlag or (random.random() < 0.5)) else 0] #entry.vec = concatenate(filter(None, [wordvec, evec])) last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = concatenate([ dynet.noise(fe, 0.2) for fe in filter( None, [wordvec, evec, last_state, rev_last_state]) ]) entry.lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input( rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence, True) gold = [entry.parent_id for entry in conll_sentence] heads = decoder.parse_proj(scores, gold if self.costaugFlag else None) if self.labelsFlag: for modifier, head in enumerate(gold[1:]): rscores, rexprs = self.__evaluateLabel( conll_sentence, head, modifier + 1) goldLabelInd = self.rels[conll_sentence[modifier + 1].relation] wrongLabelInd = max(((l, scr) for l, scr in enumerate(rscores) if l != goldLabelInd), key=itemgetter(1))[0] if rscores[goldLabelInd] < rscores[wrongLabelInd] + 1: lerrs.append(rexprs[wrongLabelInd] - rexprs[goldLabelInd]) e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) eerrors += e if e > 0: loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) eloss += (e) mloss += (e) errs.extend(loss) etotal += len(conll_sentence) concat_layer = [ concatenate(entry.lstms) for entry in conll_sentence ] concat_layer = [dynet.noise(fe, 0.2) for fe in concat_layer] outputFFlayer = self.ffSeqPredictor.predict_sequence( concat_layer) posIDs = [self.pos.get(entry.pos) for entry in conll_sentence] for pred, gold in zip(outputFFlayer, posIDs): posErrs.append(self.pick_neg_log(pred, gold)) if iSentence % 1 == 0 or len(errs) > 0 or len( lerrs) > 0 or len(posErrs) > 0: eeloss = 0.0 if len(errs) > 0 or len(lerrs) > 0 or len(posErrs) > 0: eerrs = (esum(errs + lerrs + posErrs) ) #* (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] posErrs = [] renew_cg() if len(errs) > 0: eerrs = (esum(errs + lerrs + posErrs)) #* (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] posErrs = [] eeloss = 0.0 renew_cg() self.trainer.update() print "Loss: %.2f" % (mloss / iSentence)
class jPosDepLearner: def __init__(self, vocab, pos, rels, w2i, c2i, caps, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.depFlag = options.depFlag self.sNerFlag = options.sNerFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.reldims = options.relembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.caps = {word: ind for ind, word in enumerate(caps)} self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) self.caps_lookup = self.model.add_lookup_parameters( (len(caps), self.cdims)) transition_array = np.random.rand(len(pos) + 2, len(pos) + 2) #cap_array=np.random.rand(len(caps),len(pos)) def normalizeprobs(arr): return np.array([np.divide(arr1, sum(arr1)) for arr1 in arr]) self.nertrans_lookup = self.model.add_lookup_parameters( (len(pos) + 2, len(pos) + 2)) #self.caplookup = self.model.lookup_parameters_from_numpy(normalizeprobs(cap_array)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file( options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.ffSeqPredictor = FFSequencePredictor( Layer(self.model, self.ldims * 2, len(self.pos), "idt")) self.hidden_units = options.hidden_units if not self.depFlag: self.pos_builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 3, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 3, self.ldims, self.model) ] self.pos_bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] self.ffSeqPredictor = FFSequencePredictor( Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units if self.depFlag: if self.bibiFlag: self.builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 3, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 3, self.ldims, self.model) ] self.bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] elif self.layers > 0: self.builders = [ VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 3, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 3, self.ldims, self.model) ] else: self.builders = [ SimpleRNNBuilder(1, self.wdims + self.cdims * 3, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 3, self.ldims, self.model) ] self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters( (self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters( (1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters( (self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer( self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) if self.sNerFlag: self.sner_builders = [ VanillaLSTMBuilder( 1, self.wdims + self.cdims * 3 + self.reldims, self.ldims, self.model), VanillaLSTMBuilder( 1, self.wdims + self.cdims * 3 + self.reldims, self.ldims, self.model) ] self.sner_bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] ##relation embeddings self.rellookup = self.model.add_lookup_parameters( (len(self.rels), self.reldims)) self.char_rnn = RNNSequencePredictor( LSTMBuilder(1, self.cdims, self.cdims, self.model)) def __getExpr(self, sentence, i, j): if sentence[i].headfov is None: sentence[i].headfov = concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) _inputVector = concatenate([ sentence[i].headfov, sentence[j].modfov, dynet.abs(sentence[i].headfov - sentence[j].modfov), dynet.cmult(sentence[i].headfov, sentence[j].modfov) ]) if self.hidden_units > 0: output = self.outLayer.expr() * self.activation( self.hid2Bias.expr() + self.hidLayer.expr() * self.activation(_inputVector + self.hidBias.expr())) else: output = self.outLayer.expr() * self.activation( _inputVector + self.hidBias.expr()) return output def __evaluate(self, sentence): exprs = [[ self.__getExpr(sentence, i, j) for j in xrange(len(sentence)) ] for i in xrange(len(sentence))] scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs]) return scores, exprs def pick_neg_log(self, pred, gold): return -dynet.log(dynet.pick(pred, gold)) def pick_neg_log_2(self, pred_param, gold): gold_arr = inputVector( [1 if gold == i else 0 for i in range(len(self.pos) + 2)]) x = scalarInput(1) pred_arr = softmax(pred_param * x) return -dynet.log(transpose(pred_arr) * gold_arr) def pick_gold_score(self, preds, golds): score = 0 prev_tag = len(self.pos) for pred, gold in zip(preds, golds): score += dynet.pick(pred, gold) + dynet.pick( self.nertrans_lookup[gold], prev_tag) prev_tag = gold score += dynet.pick(self.nertrans_lookup[len(self.pos) + 1], prev_tag) return score def pick_crf_score(self, preds, golds): return dynet.exp( self.pick_gold_tag_score(preds, golds) + self.pick_gold_trans_score(golds)) def forward_score(self, preds): def log_sum_exp(tag_score_arr): argmax = np.argmax(tag_score_arr.value()) max_score = tag_score_arr[argmax] score = max_score max_arr = dynet.concatenate( [max_score for i in range(len(self.pos) + 2)]) score += dynet.log( dynet.sum_dim(dynet.exp(tag_score_arr - max_arr), [0])) return score score = 0 len1 = len(self.pos) + 2 for_score = [-1e10 for i in range(len1)] for_score[-2] = 0 #print(len(preds)) for i, pred in enumerate(preds): tag_scores = [dynet.scalarInput(-1e10) for j in range(len1)] for i, score in enumerate(pred): tag_score = dynet.concatenate([ score + dynet.pick(self.nertrans_lookup[i], prev_tag) + for_score[prev_tag] for prev_tag in range(len1) ]) log_1 = log_sum_exp(tag_score) tag_scores[i] = log_1 #print("tag score: %f"%log_1.value()) for_score = tag_scores #print(dynet.concatenate(for_score).value()) term_exp = dynet.concatenate([ score + tr for score, tr in zip(for_score, self.nertrans_lookup[len(self.pos) + 1]) ]) term_score = log_sum_exp(term_exp) #print("score : %f"%term_score.value()) return term_score def nextPerm(self, perm1, taglen): a = [] for ind, x in enumerate(reversed(perm1)): if x < taglen - 1: for i in range(len(perm1) - ind - 1): a.append(perm1[i]) a.append(x + 1) for i in range(ind): a.append(0) return a return -1 ## takes toooo long def forward_score2(self, taglen, senlen, preds): score = 0 perm1 = [0 for i in range(senlen)] while perm1 != -1: score += self.pick_crf_score(preds, perm1) perm1 = self.nextPerm(perm1, taglen) return score def __getRelVector(self, sentence, i, j): if sentence[i].rheadfov is None: sentence[i].rheadfov = concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].rmodfov is None: sentence[j].rmodfov = concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) _outputVector = concatenate([ sentence[i].rheadfov, sentence[j].rmodfov, abs(sentence[i].rheadfov - sentence[j].rmodfov), cmult(sentence[i].rheadfov, sentence[j].rmodfov) ]) if self.hidden_units > 0: return self.rhid2Bias.expr() + self.rhidLayer.expr( ) * self.activation(_outputVector + self.rhidBias.expr()) else: return _outputVector def Save(self, filename): self.model.save(filename) def Load(self, filename): self.model.populate(filename) def Predict(self, conll_path, dep_epoch=1, ner_epoch=1): with open(conll_path, 'r') as conllFP: if ner_epoch == 0: read_conll_nerdep = read_conll_predict(conllFP, self.c2i, self.wordsCount) else: read_conll_nerdep = read_conll_predict_ner( conllFP, self.c2i, self.wordsCount) for iSentence, sentence in enumerate(read_conll_nerdep): conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] for entry in conll_sentence: capvec = self.caps_lookup[entry.capInfo] wordvec = self.wlookup[int(self.vocab.get( entry.norm, 0))] if self.wdims > 0 else None last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = concatenate( filter(None, [wordvec, last_state, rev_last_state, capvec])) entry.vec2 = concatenate( filter(None, [wordvec, last_state, rev_last_state, capvec])) entry.pos_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if not self.depFlag: #Predicted pos tags lstm_forward = self.pos_builders[0].initial_state() lstm_backward = self.pos_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.pos_lstms[1] = lstm_forward.output() rentry.pos_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.pos_vec = concatenate(entry.pos_lstms) blstm_forward = self.pos_bbuilders[0].initial_state() blstm_backward = self.pos_bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.pos_vec) blstm_backward = blstm_backward.add_input( rentry.pos_vec) entry.pos_lstms[1] = blstm_forward.output() rentry.pos_lstms[0] = blstm_backward.output() concat_layer = [ concatenate(entry.pos_lstms) for entry in conll_sentence ] #cap_info_sentence=[self.caplookup[entry.capInfo] for entry in conll_sentence] outputFFlayer = self.ffSeqPredictor.predict_sequence( concat_layer) best_parentids, bestscores = self.ffSeqPredictor.viterbi_sequence( outputFFlayer, self.nertrans_lookup) predicted_pos_indices = [ np.argmax(o.value()) for o in outputFFlayer ] root_predicted_postags = ["O"] predicted_postags = [ self.id2pos[idx] for idx in best_parentids ] for pos in predicted_postags: root_predicted_postags.append(pos) if iSentence < 5: for word, tag in zip(conll_sentence, root_predicted_postags): print("word : {} gold : {} pred : {}".format( word.form, word.pos, tag)) for entry, pos in zip(conll_sentence, root_predicted_postags): entry.pred_pos = pos dump = False if self.depFlag: # Add predicted pos tags for parsing prediction #for entry, posid in zip(conll_sentence, viterbi_pred_tagids): # entry.vec = concatenate([entry.vec, self.plookup[posid]]) # entry.lstms = [entry.vec, entry.vec] for entry in conll_sentence: entry.lstms = [entry.vec, entry.vec] if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input( entry.vec) blstm_backward = blstm_backward.add_input( rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence) heads = decoder.parse_proj(scores) # Multiple roots: heading to the previous "rooted" one rootCount = 0 rootWid = -1 for index, head in enumerate(heads): if head == 0: rootCount += 1 if rootCount == 1: rootWid = index if rootCount > 1: heads[index] = rootWid rootWid = index for entry, head in zip(conll_sentence, heads): entry.pred_parent_id = head entry.pred_relation = '_' #entry.pred_pos = pos if self.labelsFlag: concat_layer = [ self.__getRelVector(conll_sentence, head, modifier + 1) for modifier, head in enumerate(heads[1:]) ] outputFFlayer = self.ffRelPredictor.predict_sequence( concat_layer) predicted_rel_indices = [ np.argmax(o.value()) for o in outputFFlayer ] predicted_rels = [ self.irels[idx] for idx in predicted_rel_indices ] for modifier, head in enumerate(heads[1:]): conll_sentence[ modifier + 1].pred_relation = predicted_rels[modifier] if self.sNerFlag and ner_epoch == 1: conll_sentence[0].vec = concatenate([ conll_sentence[0].vec2, self.rellookup[self.rels["rroot"]] ]) for entry, pred in zip(conll_sentence[1:], predicted_rel_indices): relvec = self.rellookup[pred] # for entry, posid in zip(conll_sentence, viterbi_pred_tagids): entry.vec = concatenate([entry.vec2, relvec]) for entry in conll_sentence: entry.ner2_lstms = [entry.vec, entry.vec] slstm_forward = self.sner_builders[0].initial_state() slstm_backward = self.sner_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = slstm_forward.add_input(entry.vec) lstm_backward = slstm_backward.add_input( rentry.vec) entry.ner2_lstms[1] = lstm_forward.output() rentry.ner2_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.ner2_vec = concatenate(entry.ner2_lstms) sblstm_forward = self.sner_bbuilders[0].initial_state() sblstm_backward = self.sner_bbuilders[1].initial_state( ) for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = sblstm_forward.add_input( entry.ner2_vec) blstm_backward = sblstm_backward.add_input( rentry.ner2_vec) entry.ner2_lstms[1] = blstm_forward.output() rentry.ner2_lstms[0] = blstm_backward.output() concat_layer = [ dynet.dropout(concatenate(entry.ner2_lstms), 0.33) for entry in conll_sentence ] outputFFlayer = self.ffSeqPredictor.predict_sequence( concat_layer) best_parentids, bestscores = self.ffSeqPredictor.viterbi_sequence( outputFFlayer, self.nertrans_lookup) predicted_pos_indices = [ np.argmax(o.value()) for o in outputFFlayer ] root_predicted_postags = ["O"] predicted_postags = [ self.id2pos[idx] for idx in best_parentids ] for pos in predicted_postags: root_predicted_postags.append(pos) if iSentence < 1: for word, tag in zip(conll_sentence, root_predicted_postags): print("word : {} gold : {} pred : {}".format( word.form, word.pos, tag)) for entry, pos in zip(conll_sentence, root_predicted_postags): entry.pred_pos = pos dump = False renew_cg() if not dump: yield sentence def Train(self, conll_path, dep_epoch=0, ner_epoch=0): eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() dep_epoch = dep_epoch ner_epoch = ner_epoch with open(conll_path, 'r') as conllFP: if ner_epoch == 0: read_conll_nerdep = read_conll(conllFP, self.c2i) else: read_conll_nerdep = read_conll_ner(conllFP, self.c2i) shuffledData = list(read_conll_nerdep) random.shuffle(shuffledData) errs = [] lerrs = [] posErrs = 0 postrErrs = [] nertr2Errs = [] ner2Errs = dynet.inputVector([0]) startind = 0 e = 0 for iSentence, sentence in enumerate(shuffledData): if iSentence % 500 == 0 and iSentence != 0: print "Processing sentence number: %d" % iSentence, ", Loss: %.4f" % ( eloss / etotal), ", Time: %.2f" % (time.time() - start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] for entry in conll_sentence: c = float(self.wordsCount.get(entry.norm, 0)) dropFlag = (random.random() < (c / (0.25 + c))) capvec = self.caps_lookup[entry.capInfo] wordvec = self.wlookup[ int(self.vocab.get(entry.norm, 0) ) if dropFlag else 0] if self.wdims > 0 else None last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = dynet.dropout( concatenate( filter( None, [wordvec, last_state, rev_last_state, capvec ])), 0.33) entry.vec2 = entry.vec entry.pos_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if not self.depFlag: #NER tagging loss lstm_forward = self.pos_builders[0].initial_state() lstm_backward = self.pos_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.pos_lstms[1] = lstm_forward.output() rentry.pos_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.pos_vec = concatenate(entry.pos_lstms) blstm_forward = self.pos_bbuilders[0].initial_state() blstm_backward = self.pos_bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.pos_vec) blstm_backward = blstm_backward.add_input( rentry.pos_vec) entry.pos_lstms[1] = blstm_forward.output() rentry.pos_lstms[0] = blstm_backward.output() concat_layer = [ dynet.dropout(concatenate(entry.pos_lstms), 0.33) for entry in conll_sentence ] cap_info_sentence = [ self.caps_lookup[entry.capInfo] for entry in conll_sentence ] outputFFlayer = self.ffSeqPredictor.predict_sequence( concat_layer) posIDs = [ self.pos.get(entry.pos) for entry in conll_sentence ] posErrs = (self.forward_score(outputFFlayer) - self.pick_gold_score(outputFFlayer, posIDs)) ##dependency Flag if self.depFlag: # Add predicted ner tags #for entry, poses in zip(conll_sentence, outputFFlayer): # entry.vec = concatenate([entry.vec, dynet.dropout(self.plookup[np.argmax(poses.value())], 0.33)]) for entry in conll_sentence: entry.lstms = [entry.vec, entry.vec] #Parsing losses if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input( entry.vec) blstm_backward = blstm_backward.add_input( rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence) gold = [entry.parent_id for entry in conll_sentence] heads = decoder.parse_proj( scores, gold if self.costaugFlag else None) if self.labelsFlag: concat_layer = [ dynet.dropout( self.__getRelVector(conll_sentence, head, modifier + 1), 0.33) for modifier, head in enumerate(gold[1:]) ] outputFFlayer = self.ffRelPredictor.predict_sequence( concat_layer) if dep_epoch == 1: relIDs = [ self.rels[conll_sentence[modifier + 1].relation] for modifier, _ in enumerate(gold[1:]) ] for pred, goldid in zip(outputFFlayer, relIDs): lerrs.append(self.pick_neg_log(pred, goldid)) if dep_epoch == 1: e = sum( [1 for h, g in zip(heads[1:], gold[1:]) if h != g]) if self.sNerFlag and ner_epoch == 1: conll_sentence[0].vec = concatenate([ conll_sentence[0].vec2, self.rellookup[self.rels["rroot"]] ]) for entry, pred in zip(conll_sentence[1:], outputFFlayer): relvec = self.rellookup[np.argmax(pred.value())] entry.vec = concatenate( [entry.vec2, dynet.dropout(relvec, 0.33)]) for entry in conll_sentence: entry.ner2_lstms = [entry.vec, entry.vec] slstm_forward = self.sner_builders[0].initial_state() slstm_backward = self.sner_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = slstm_forward.add_input(entry.vec) lstm_backward = slstm_backward.add_input( rentry.vec) entry.ner2_lstms[1] = lstm_forward.output() rentry.ner2_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.ner2_vec = concatenate(entry.ner2_lstms) sblstm_forward = self.sner_bbuilders[0].initial_state() sblstm_backward = self.sner_bbuilders[1].initial_state( ) for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = sblstm_forward.add_input( entry.ner2_vec) blstm_backward = sblstm_backward.add_input( rentry.ner2_vec) entry.ner2_lstms[1] = blstm_forward.output() rentry.ner2_lstms[0] = blstm_backward.output() concat_layer = [ dynet.dropout(concatenate(entry.ner2_lstms), 0.33) for entry in conll_sentence ] outputFFlayer = self.ffSeqPredictor.predict_sequence( concat_layer) posIDs = [ self.pos.get(entry.pos) for entry in conll_sentence ] gold_score = self.pick_gold_score( outputFFlayer, posIDs) ner2Errs = (self.forward_score(outputFFlayer) - gold_score) if iSentence < 5: print("ner and dep loss") if ner2Errs != 0: print(ner2Errs.value()) else: print(0) if dep_epoch != 0: print(esum(lerrs).value()) else: print(0) eerrors += e if e > 0: loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) eloss += (e) mloss += (e) errs.extend(loss) etotal += len(conll_sentence) if iSentence % 1 == 0: if len(errs) > 0 or len(lerrs) > 0 or posErrs > 0 or len( postrErrs) > 0 or ner2Errs > 0 or len( nertr2Errs) > 0: eerrs = 0 if len(errs + lerrs + postrErrs + nertr2Errs) > 0: eerrs = esum(errs + lerrs + postrErrs + nertr2Errs) eerrs += (posErrs + ner2Errs) #print(eerrs.value()) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] e = 0 lerrs = [] posErrs = [] postrErrs = [] ner2Errs = [] nertr2Errs = [] posErrs = 0 ner2Errs = 0 renew_cg() print "Loss: %.4f" % (mloss / iSentence)
def __init__(self, vocab, pos, rels, morphs, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #self.trainer = SimpleSGDTrainer(self.model) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.rnn_type = options.rnn_type self.pos_ldims = options.pos_lstm_dims self.dep_ldims = options.dep_lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.mdims = options.membedding_dims self.pdims = options.pembedding_dims self.pos_layer = options.pos_layer self.dep_layer = options.dep_layer self.pos_drop_rate = options.pos_dropout self.dep_drop_rate = options.dep_dropout self.gold_pos = options.gold_pos self.wordsCount = vocab self.vocab = {word: ind+3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.morphs = {feats : ind for ind, feats in enumerate(morphs)} # self.id2morph = list(morphs) self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.external_embedding, self.edim = None, 0 if options.external_embedding is not None: external_embedding_fp = open(options.external_embedding,'r') external_embedding_fp.readline() self.external_embedding = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] for line in external_embedding_fp} external_embedding_fp.close() self.edim = len(self.external_embedding.values()[0]) self.noextrn = [0.0 for _ in xrange(self.edim)] self.extrnd = {word: i + 3 for i, word in enumerate(self.external_embedding)} self.elookup = self.model.add_lookup_parameters((len(self.external_embedding) + 3, self.edim)) for word, i in self.extrnd.iteritems(): self.elookup.init_row(i, self.external_embedding[word]) self.extrnd['*PAD*'] = 1 self.extrnd['*INITIAL*'] = 2 print 'Load external embedding. Vector dimensions', self.edim if self.rnn_type == 'LSTM': # self.pos_builder = [LSTMBuilder(self.pos_layer, self.wdims + self.edim + self.cdims * 2, self.pos_ldims, self.model), # LSTMBuilder(self.pos_layer, self.wdims + self.edim + self.cdims * 2, self.pos_ldims, self.model)] # self.dep_builders = [LSTMBuilder(self.dep_layer, self.pos_ldims * 2 + self.pdims, self.dep_ldims, self.model), # LSTMBuilder(self.dep_layer, self.pos_ldims * 2 + self.pdims, self.dep_ldims, self.model)] # self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model)) self.pos_builder = [VanillaLSTMBuilder(self.pos_layer, self.wdims + self.edim + self.cdims * 2, self.pos_ldims, self.model), VanillaLSTMBuilder(self.pos_layer, self.wdims + self.edim + self.cdims * 2, self.pos_ldims, self.model)] self.dep_builders = [VanillaLSTMBuilder(self.dep_layer, self.pos_ldims * 2 + self.pdims, self.dep_ldims, self.model), VanillaLSTMBuilder(self.dep_layer, self.pos_ldims * 2 + self.pdims, self.dep_ldims, self.model)] self.char_rnn = RNNSequencePredictor(VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model)) else: self.pos_builder = [GRUBuilder(self.pos_layer, self.wdims + self.edim + self.cdims * 2, self.pos_ldims, self.model), GRUBuilder(self.pos_layer, self.wdims + self.edim + self.cdims * 2, self.pos_ldims, self.model)] self.dep_builders = [GRUBuilder(self.dep_layer, self.pos_ldims * 2 + self.pdims, self.dep_ldims, self.model), GRUBuilder(self.dep_layer, self.pos_ldims * 2 + self.pdims, self.dep_ldims, self.model)] self.char_rnn = RNNSequencePredictor(GRUBuilder(1, self.cdims, self.cdims, self.model)) self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.pos_ldims * 2, len(self.pos), softmax)) self.arc_hid = options.arc_hidden self.rel_hid = options.rel_hidden self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) # Load pretrained if options.pretrain_wembed is not None: print('Loading pretrained word embedding...') with open(options.pretrain_wembed, 'r') as emb_f: next(emb_f) for line in emb_f: self.pretrained_wembed = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] for line in emb_f} for word in self.pretrained_wembed.keys(): if word in self.vocab: self.wlookup.init_row(self.vocab[word], self.pretrained_wembed[word]) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.mlookup = self.model.add_lookup_parameters((len(morphs), self.mdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) self.hidLayerFOH = self.model.add_parameters((self.arc_hid, self.dep_ldims * 2)) self.hidLayerFOM = self.model.add_parameters((self.arc_hid, self.dep_ldims * 2)) self.hidBias = self.model.add_parameters((self.arc_hid)) self.hid2Layer = self.model.add_parameters((self.hidden2_units, self.arc_hid)) self.hid2Bias = self.model.add_parameters((self.hidden2_units)) self.outLayer = self.model.add_parameters((1, self.hidden2_units if self.hidden2_units > 0 else self.arc_hid)) if self.labelsFlag: self.rhidLayerFOH = self.model.add_parameters((self.rel_hid, 2 * self.dep_ldims)) self.rhidLayerFOM = self.model.add_parameters((self.rel_hid, 2 * self.dep_ldims)) self.rhidBias = self.model.add_parameters((self.rel_hid)) self.rhid2Layer = self.model.add_parameters((self.hidden2_units, self.rel_hid)) self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) self.routLayer = self.model.add_parameters((len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.rel_hid)) self.routBias = self.model.add_parameters((len(self.irels))) self.charSeqPredictor = FFSequencePredictor(Layer(self.model, self.cdims*2, len(self.morphs), softmax))
def __init__(self, vocab, pos, rels, w2i, c2i, m2i, t2i, morph_dict, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.morphFlag = options.morphFlag self.goldMorphFlag = options.goldMorphFlag self.morphTagFlag = options.morphTagFlag self.goldMorphTagFlag = options.goldMorphTagFlag self.lowerCase = options.lowerCase self.mtag_encoding_composition_type = options.mtag_encoding_composition_type self.mtag_encoding_composition_alpha = options.mtag_encoding_composition_alpha self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.mdims = options.membedding_dims self.tdims = options.tembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in iter(w2i.items())} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.m2i = m2i self.t2i = t2i self.i2t = {t2i[i]:i for i in self.t2i} self.morph_dict = morph_dict self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.tagging_attention_size = options.tagging_att_size self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) self.ext_embeddings = None if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=self.lowerCase, type=options.external_embedding_type) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: if word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[word]) self.ext_embeddings = ext_embeddings print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.morph_dims = 2*2*self.mdims if self.morphFlag else 0 self.mtag_dims = 2*self.tdims if self.morphTagFlag else 0 self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)] self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] if self.bibiFlag: self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)] self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)] self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model)) if self.morphFlag: self.seg_lstm = [VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model), VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model)] self.seg_hidLayer = self.model.add_parameters((1, self.cdims*2)) self.slookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims)) self.char_lstm = [VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model), VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model)] self.char_hidLayer = self.model.add_parameters((self.mdims, self.mdims*2)) self.mclookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims)) self.morph_lstm = [VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model), VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model)] self.morph_hidLayer = self.model.add_parameters((self.wdims, self.wdims*2)) self.mlookup = self.model.add_lookup_parameters((len(m2i), self.mdims)) self.morph_rnn = RNNSequencePredictor(LSTMBuilder(1, self.mdims*2, self.mdims*2, self.model)) if self.morphTagFlag: # All weights for morpheme taging will be here. (CURSOR) # Decoder self.dec_lstm = VanillaLSTMBuilder(1, 2 * self.cdims + self.tdims + self.cdims * 2, self.cdims, self.model) # Attention self.attention_w1 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_w2 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_v = self.model.add_parameters((1, self.tagging_attention_size)) # Attention Context self.attention_w1_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_w2_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_v_context = self.model.add_parameters((1, self.tagging_attention_size)) # MLP - Softmax self.decoder_w = self.model.add_parameters((len(t2i), self.cdims)) self.decoder_b = self.model.add_parameters((len(t2i))) self.mtag_rnn = RNNSequencePredictor(VanillaLSTMBuilder(1, self.tdims, self.tdims, self.model)) self.tlookup = self.model.add_lookup_parameters((len(t2i), self.tdims)) if self.mtag_encoding_composition_type != "None": self.mtag_encoding_f_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims)) self.mtag_encoding_f_b = self.model.add_parameters((2 * self.tdims)) self.mtag_encoding_b_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims)) self.mtag_encoding_b_b = self.model.add_parameters((2 * self.tdims))
class jPosDepLearner: def __init__(self, vocab, pos, rels, w2i, c2i, m2i, t2i, morph_dict, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.morphFlag = options.morphFlag self.goldMorphFlag = options.goldMorphFlag self.morphTagFlag = options.morphTagFlag self.goldMorphTagFlag = options.goldMorphTagFlag self.lowerCase = options.lowerCase self.mtag_encoding_composition_type = options.mtag_encoding_composition_type self.mtag_encoding_composition_alpha = options.mtag_encoding_composition_alpha self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.mdims = options.membedding_dims self.tdims = options.tembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in iter(w2i.items())} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.m2i = m2i self.t2i = t2i self.i2t = {t2i[i]:i for i in self.t2i} self.morph_dict = morph_dict self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.tagging_attention_size = options.tagging_att_size self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) self.ext_embeddings = None if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=self.lowerCase, type=options.external_embedding_type) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: if word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[word]) self.ext_embeddings = ext_embeddings print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.morph_dims = 2*2*self.mdims if self.morphFlag else 0 self.mtag_dims = 2*self.tdims if self.morphTagFlag else 0 self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)] self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] if self.bibiFlag: self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)] self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)] self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model)) if self.morphFlag: self.seg_lstm = [VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model), VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model)] self.seg_hidLayer = self.model.add_parameters((1, self.cdims*2)) self.slookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims)) self.char_lstm = [VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model), VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model)] self.char_hidLayer = self.model.add_parameters((self.mdims, self.mdims*2)) self.mclookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims)) self.morph_lstm = [VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model), VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model)] self.morph_hidLayer = self.model.add_parameters((self.wdims, self.wdims*2)) self.mlookup = self.model.add_lookup_parameters((len(m2i), self.mdims)) self.morph_rnn = RNNSequencePredictor(LSTMBuilder(1, self.mdims*2, self.mdims*2, self.model)) if self.morphTagFlag: # All weights for morpheme taging will be here. (CURSOR) # Decoder self.dec_lstm = VanillaLSTMBuilder(1, 2 * self.cdims + self.tdims + self.cdims * 2, self.cdims, self.model) # Attention self.attention_w1 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_w2 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_v = self.model.add_parameters((1, self.tagging_attention_size)) # Attention Context self.attention_w1_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_w2_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_v_context = self.model.add_parameters((1, self.tagging_attention_size)) # MLP - Softmax self.decoder_w = self.model.add_parameters((len(t2i), self.cdims)) self.decoder_b = self.model.add_parameters((len(t2i))) self.mtag_rnn = RNNSequencePredictor(VanillaLSTMBuilder(1, self.tdims, self.tdims, self.model)) self.tlookup = self.model.add_lookup_parameters((len(t2i), self.tdims)) if self.mtag_encoding_composition_type != "None": self.mtag_encoding_f_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims)) self.mtag_encoding_f_b = self.model.add_parameters((2 * self.tdims)) self.mtag_encoding_b_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims)) self.mtag_encoding_b_b = self.model.add_parameters((2 * self.tdims)) def initialize(self): if self.morphFlag and self.ext_embeddings: print("Initializing word embeddings by morph2vec") count = 0 for word in self.vocab: if word not in self.ext_embeddings and word in self.morph_dict: morph_seg = self.morph_dict[word] count += 1 self.wlookup.init_row(self.vocab[word], self.__getWordVector(morph_seg).vec_value()) print("Vocab size: %d; #missing words having generated vectors: %d" % (len(self.vocab), count)) renew_cg() def __getExpr(self, sentence, i, j): if sentence[i].headfov is None: sentence[i].headfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) _inputVector = concatenate( [sentence[i].headfov, sentence[j].modfov, dynet.abs(sentence[i].headfov - sentence[j].modfov), dynet.cmult(sentence[i].headfov, sentence[j].modfov)]) if self.hidden_units > 0: output = self.outLayer.expr() * self.activation( self.hid2Bias.expr() + self.hidLayer.expr() * self.activation( _inputVector + self.hidBias.expr())) else: output = self.outLayer.expr() * self.activation(_inputVector + self.hidBias.expr()) return output def __evaluate(self, sentence): exprs = [[self.__getExpr(sentence, i, j) for j in range(len(sentence))] for i in range(len(sentence))] scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs]) return scores, exprs def pick_neg_log(self, pred, gold): return -dynet.log(dynet.pick(pred, gold)) def binary_crossentropy(self, pred, gold): return dynet.binary_log_loss(pred, gold) def cosine_proximity(self, pred, gold): def l2_normalize(x): square_sum = dynet.sqrt(dynet.bmax(dynet.sum_elems(dynet.square(x)), np.finfo(float).eps * dynet.ones((1))[0])) return dynet.cdiv(x, square_sum) y_true = l2_normalize(pred) y_pred = l2_normalize(gold) return -dynet.sum_elems(dynet.cmult(y_true, y_pred)) def __getRelVector(self, sentence, i, j): if sentence[i].rheadfov is None: sentence[i].rheadfov = concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].rmodfov is None: sentence[j].rmodfov = concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) _outputVector = concatenate( [sentence[i].rheadfov, sentence[j].rmodfov, abs(sentence[i].rheadfov - sentence[j].rmodfov), cmult(sentence[i].rheadfov, sentence[j].rmodfov)]) if self.hidden_units > 0: return self.rhid2Bias.expr() + self.rhidLayer.expr() * self.activation( _outputVector + self.rhidBias.expr()) else: return _outputVector def __getSegmentationVector(self, word): slstm_forward = self.seg_lstm[0].initial_state() slstm_backward = self.seg_lstm[1].initial_state() seg_lstm_forward = slstm_forward.transduce([self.slookup[self.c2i[char] if char in self.c2i else 0] for char in word]) seg_lstm_backward = slstm_backward.transduce([self.slookup[self.c2i[char] if char in self.c2i else 0] for char in reversed(word)]) seg_vec = [] for seg, rev_seg in zip(seg_lstm_forward,reversed(seg_lstm_backward)): seg_vec.append(dynet.logistic(self.seg_hidLayer.expr() * concatenate([seg,rev_seg]))) seg_vec = concatenate(seg_vec) return seg_vec def __getMorphVector(self, morph): clstm_forward = self.char_lstm[0].initial_state() clstm_backward = self.char_lstm[1].initial_state() char_lstm_forward = clstm_forward.transduce([self.mclookup[self.c2i[char] if char in self.c2i else 0] for char in morph] if len(morph) > 0 else [self.mclookup[0]])[-1] char_lstm_backward = clstm_backward.transduce([self.mclookup[self.c2i[char] if char in self.c2i else 0] for char in reversed(morph)] if len(morph) > 0 else [self.mclookup[0]])[-1] char_emb = self.char_hidLayer.expr() * concatenate([char_lstm_forward,char_lstm_backward]) return concatenate([self.mlookup[self.m2i[morph] if morph in self.m2i else 0], char_emb]) def __getWordVector(self, morph_seg): mlstm_forward = self.morph_lstm[0].initial_state() mlstm_backward = self.morph_lstm[1].initial_state() morph_lstm_forward = mlstm_forward.transduce([self.__getMorphVector(morph) for morph in morph_seg])[-1] morph_lstm_backward = mlstm_backward.transduce([self.__getMorphVector(morph) for morph in reversed(morph_seg)])[-1] morph_enc = concatenate([morph_lstm_forward, morph_lstm_backward]) word_vec = self.morph_hidLayer.expr() * morph_enc return word_vec def attend(self, input_mat, state, w1dt): w2 = parameter(self.attention_w2) v = parameter(self.attention_v) # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols # w1dt: (attdim x seqlen) # w2dt: (attdim,1) w2dt = w2 * concatenate(list(state.s())) # att_weights: (seqlen,) row vector # unnormalized: (seqlen,) unnormalized = transpose(v * tanh(colwise_add(w1dt, w2dt))) att_weights = softmax(unnormalized) # context: (encoder_state) context = input_mat * att_weights return context def attend_context(self, input_mat, state, w1dt_context): w2_context = parameter(self.attention_w2_context) v_context = parameter(self.attention_v_context) # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols # w1dt: (attdim x seqlen) # w2dt: (attdim,1) w2dt_context = w2_context * concatenate(list(state.s())) # att_weights: (seqlen,) row vector # unnormalized: (seqlen,) unnormalized = transpose(v_context * tanh(colwise_add(w1dt_context, w2dt_context))) att_weights = softmax(unnormalized) # context: (encoder_state) context = input_mat * att_weights return context def decode(self, vectors, decoder_seq, word_context): w = parameter(self.decoder_w) b = parameter(self.decoder_b) w1 = parameter(self.attention_w1) w1_context = parameter(self.attention_w1_context) input_mat = concatenate_cols(vectors) input_context = concatenate_cols(word_context) w1dt = None w1dt_context = None last_output_embeddings = self.tlookup[self.t2i["<s>"]] s = self.dec_lstm.initial_state().add_input(concatenate([vecInput(self.cdims * 2), last_output_embeddings, vecInput(self.cdims * 2)])) loss = [] for char in decoder_seq: # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat w1dt_context = w1dt_context or w1_context * input_context vector = concatenate([self.attend(input_mat, s, w1dt), last_output_embeddings, self.attend_context(input_context, s, w1dt_context)]) s = s.add_input(vector) out_vector = w * s.output() + b probs = softmax(out_vector) last_output_embeddings = self.tlookup[char] loss.append(-log(pick(probs, char))) loss = esum(loss) return loss def __getLossMorphTagging(self, all_encoded_states, decoder_gold, word_context): return self.decode(all_encoded_states, decoder_gold, word_context) def generate(self, encoded, word_context): w = parameter(self.decoder_w) b = parameter(self.decoder_b) w1 = parameter(self.attention_w1) w1_context = parameter(self.attention_w1_context) input_mat = concatenate_cols(encoded) input_context = concatenate_cols(word_context) w1dt = None w1dt_context = None last_output_embeddings = self.tlookup[self.t2i["<s>"]] s = self.dec_lstm.initial_state().add_input(concatenate([vecInput(self.cdims * 2), last_output_embeddings, vecInput(self.cdims * 2)])) out = [] count_EOS = 0 limit_features = 10 for i in range(limit_features): if count_EOS == 2: break # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat w1dt_context = w1dt_context or w1_context * input_context vector = concatenate([self.attend(input_mat, s, w1dt), last_output_embeddings, self.attend_context(input_context, s, w1dt_context)]) s = s.add_input(vector) out_vector = w * s.output() + b probs = softmax(out_vector).vec_value() next_char = probs.index(max(probs)) last_output_embeddings = self.tlookup[next_char] if next_char == self.t2i["<s>"]: count_EOS += 1 out.append(next_char) return out def Save(self, filename): self.model.save(filename) def Load(self, filename): self.model.populate(filename) def Predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate(read_conll(conllFP, self.c2i, self.m2i, self.t2i, self.morph_dict)): conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] if self.morphTagFlag: sentence_context = [] last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1] rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1] sentence_context.append(concatenate([last_state_char, rev_last_state_char])) for entry in conll_sentence: last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars]) rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)]) entry.char_rnn_states = [concatenate([f,b]) for f,b in zip(last_state_char, rev_last_state_char)] sentence_context.append(entry.char_rnn_states[-1]) for idx, entry in enumerate(conll_sentence): wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None if self.morphTagFlag: entry.vec = concatenate([wordvec, entry.char_rnn_states[-1]]) else: last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1] rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = concatenate([wordvec, last_state_char, rev_last_state_char]) for idx, entry in enumerate(conll_sentence): if self.morphFlag: if len(entry.norm) > 2: if self.goldMorphFlag: seg_vec = self.__getSegmentationVector(entry.norm) seg_vec = dynet.vecInput(seg_vec.dim()[0][0]) seg_vec.set(entry.idMorphs) morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value()) entry.pred_seg = morph_seg else: seg_vec = self.__getSegmentationVector(entry.norm) morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value()) entry.pred_seg = seg_vec.vec_value() else: morph_seg = [entry.norm] entry.pred_seg = entry.idMorphs entry.seg = entry.idMorphs last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in morph_seg])[-1] rev_last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in reversed(morph_seg)])[ -1] entry.vec = concatenate([entry.vec, last_state_morph, rev_last_state_morph]) morphtag_encodings = [] for idx, entry in enumerate(conll_sentence): if self.morphTagFlag: if self.goldMorphTagFlag: morph_tags = entry.idMorphTags entry.pred_tags = entry.idMorphTags entry.pred_tags_tokens = [self.i2t[m_tag_id] for m_tag_id in entry.pred_tags] else: word_context = [c for i, c in enumerate(sentence_context) if i - 1 != idx] entry.pred_tags = self.generate(entry.char_rnn_states, word_context) morph_tags = entry.pred_tags entry.tags = entry.idMorphTags entry.pred_tags_tokens = [self.i2t[m_tag_id] for m_tag_id in entry.pred_tags] last_state_mtag = self.mtag_rnn.predict_sequence([self.tlookup[t] for t in morph_tags])[-1] rev_last_state_mtag = self.mtag_rnn.predict_sequence([self.tlookup[t] for t in reversed(morph_tags)])[-1] current_encoding_mtag = concatenate([last_state_mtag, rev_last_state_mtag]) morphtag_encodings.append(current_encoding_mtag) if self.morphTagFlag: forward = [] for idx, encoding in enumerate(morphtag_encodings): if idx == 0: forward.append(encoding) else: updated = morphtag_encodings[idx-1]*self.mtag_encoding_composition_alpha \ + encoding*(1-self.mtag_encoding_composition_alpha) forward.append(updated) if self.mtag_encoding_composition_type == "w_sum": upper_morphtag_encodings = forward elif self.mtag_encoding_composition_type == "bi_w_sum": backward = [] for idx, r_encoding in enumerate(morphtag_encodings): if idx == len(morphtag_encodings) - 1: backward.append(r_encoding) else: updated = morphtag_encodings[idx+1]*self.mtag_encoding_composition_alpha \ + r_encoding*(1-self.mtag_encoding_composition_alpha) backward.append(updated) upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)] elif self.mtag_encoding_composition_type == "bi_mlp": forward = [] backward = [] for idx, encoding in enumerate(morphtag_encodings): if idx != 0: f = self.mtag_encoding_f_w * concatenate([encoding, morphtag_encodings[idx-1]]) \ + self.mtag_encoding_f_b forward.append(f) else: forward.append(encoding) if idx != len(morphtag_encodings) - 1: b = self.mtag_encoding_b_w * concatenate([encoding, morphtag_encodings[idx+1]]) \ + self.mtag_encoding_b_b backward.append(b) else: backward.append(encoding) upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)] else: upper_morphtag_encodings = morphtag_encodings for entry, mtag in zip(conll_sentence, upper_morphtag_encodings): entry.vec = concatenate([entry.vec, mtag]) for idx, entry in enumerate(conll_sentence): entry.pos_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None #Predicted pos tags lstm_forward = self.pos_builders[0].initial_state() lstm_backward = self.pos_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.pos_lstms[1] = lstm_forward.output() rentry.pos_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.pos_vec = concatenate(entry.pos_lstms) blstm_forward = self.pos_bbuilders[0].initial_state() blstm_backward = self.pos_bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.pos_vec) blstm_backward = blstm_backward.add_input(rentry.pos_vec) entry.pos_lstms[1] = blstm_forward.output() rentry.pos_lstms[0] = blstm_backward.output() concat_layer = [concatenate(entry.pos_lstms) for entry in conll_sentence] outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer) predicted_pos_indices = [np.argmax(o.value()) for o in outputFFlayer] predicted_postags = [self.id2pos[idx] for idx in predicted_pos_indices] # Add predicted pos tags for parsing prediction for entry, posid in zip(conll_sentence, predicted_pos_indices): entry.vec = concatenate([entry.vec, self.plookup[posid]]) entry.lstms = [entry.vec, entry.vec] if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence) heads = decoder.parse_proj(scores) # Multiple roots: heading to the previous "rooted" one rootCount = 0 rootWid = -1 for index, head in enumerate(heads): if head == 0: rootCount += 1 if rootCount == 1: rootWid = index if rootCount > 1: heads[index] = rootWid rootWid = index for entry, head, pos in zip(conll_sentence, heads, predicted_postags): entry.pred_parent_id = head entry.pred_relation = '_' entry.pred_pos = pos dump = False if self.labelsFlag: concat_layer = [self.__getRelVector(conll_sentence, head, modifier + 1) for modifier, head in enumerate(heads[1:])] outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer) predicted_rel_indices = [np.argmax(o.value()) for o in outputFFlayer] predicted_rels = [self.irels[idx] for idx in predicted_rel_indices] for modifier, head in enumerate(heads[1:]): conll_sentence[modifier + 1].pred_relation = predicted_rels[modifier] renew_cg() if not dump: yield sentence def morph2word(self, morph_dict): word_emb = {} for word in morph_dict.keys(): morph_seg = morph_dict[word] word_vec = self.__getWordVector(morph_seg) word_emb[word] = word_vec.vec_value() renew_cg() return word_emb def morph(self): morph_dict = {} for morph in self.m2i.keys(): morph_dict[morph] = self.__getMorphVector(morph).vec_value() renew_cg() return morph_dict def Train_Morph(self): self.trainer.set_sparse_updates(False) start = time.time() for iWord, word in enumerate(list(self.morph_dict.keys())): if iWord % 2000 == 0 and iWord != 0: print("Processing word number: %d" % iWord, ", Time: %.2f" % (time.time() - start)) start = time.time() morph_seg = self.morph_dict[word] morph_vec = self.__getWordVector(morph_seg) if self.ext_embeddings is None: vec_gold = self.wlookup[int(self.vocab.get(word, 0))].vec_value() elif word in self.ext_embeddings: vec_gold = self.ext_embeddings[word] else: vec_gold = None if vec_gold is not None: y_gold = dynet.vecInput(self.wdims) y_gold.set(vec_gold) mErrs = self.cosine_proximity(morph_vec, y_gold) mErrs.backward() self.trainer.update() renew_cg() def embed_word(self, word): return [self.input_lookup[char] for char in word] def run_lstm(self, init_state, input_vecs): s = init_state out_vectors = [] for vector in input_vecs: s = s.add_input(vector) out_vector = s.output() out_vectors.append(out_vector) return out_vectors def encode_word(self, word): word_rev = list(reversed(word)) fwd_vectors = self.run_lstm(self.enc_fwd_lstm.initial_state(), word) bwd_vectors = self.run_lstm(self.enc_bwd_lstm.initial_state(), word_rev) bwd_vectors = list(reversed(bwd_vectors)) vectors = [concatenate(list(p)) for p in zip(fwd_vectors, bwd_vectors)] return vectors def Train(self, conll_path): self.trainer.set_sparse_updates(True) eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, self.c2i, self.m2i, self.t2i, self.morph_dict)) random.shuffle(shuffledData) errs = [] lerrs = [] posErrs = [] segErrs = [] mTagErrs = [] for iSentence, sentence in enumerate(shuffledData): if iSentence % 500 == 0 and iSentence != 0: print("Processing sentence number: %d" % iSentence, ", Loss: %.4f" % ( eloss / etotal), ", Time: %.2f" % (time.time() - start)) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] if self.morphTagFlag: sentence_context = [] last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1] rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[self.c2i["<start>"]]])[-1] sentence_context.append(concatenate([last_state_char, rev_last_state_char])) for entry in conll_sentence: last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars]) rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)]) entry.char_rnn_states = [concatenate([f,b]) for f,b in zip(last_state_char, rev_last_state_char)] sentence_context.append(entry.char_rnn_states[-1]) for idx, entry in enumerate(conll_sentence): c = float(self.wordsCount.get(entry.norm, 0)) dropFlag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup[ int(self.vocab.get(entry.norm, 0)) if dropFlag else 0] if self.wdims > 0 else None if self.morphTagFlag : entry.vec = dynet.dropout(concatenate([wordvec, entry.char_rnn_states[-1]]), 0.33) else: last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in entry.idChars])[-1] rev_last_state_char = self.char_rnn.predict_sequence([self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = dynet.dropout(concatenate([wordvec, last_state_char, rev_last_state_char]), 0.33) for idx, entry in enumerate(conll_sentence): if self.morphFlag: if len(entry.norm) > 2: if self.goldMorphFlag: seg_vec = self.__getSegmentationVector(entry.norm) seg_vec = dynet.vecInput(seg_vec.dim()[0][0]) seg_vec.set(entry.idMorphs) morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value()) else: seg_vec = self.__getSegmentationVector(entry.norm) morph_seg = utils.generate_morphs(entry.norm, seg_vec.vec_value()) vec_gold = dynet.vecInput(seg_vec.dim()[0][0]) vec_gold.set(entry.idMorphs) segErrs.append(self.binary_crossentropy(seg_vec,vec_gold)) else: morph_seg = [entry.norm] last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in morph_seg])[-1] rev_last_state_morph = self.morph_rnn.predict_sequence([self.__getMorphVector(morph) for morph in reversed(morph_seg)])[ -1] encoding_morph = concatenate([last_state_morph, rev_last_state_morph]) entry.vec = concatenate([entry.vec, dynet.dropout(encoding_morph, 0.33)]) morphtag_encodings = [] for idx, entry in enumerate(conll_sentence): if self.morphTagFlag: if self.goldMorphTagFlag: morph_tags = entry.idMorphTags else: word_context = [c for i, c in enumerate(sentence_context) if i-1 != idx] mTagErrs.append( self.__getLossMorphTagging(entry.char_rnn_states, entry.idMorphTags, word_context)) predicted_sequence = self.generate(entry.char_rnn_states, word_context) morph_tags = predicted_sequence last_state_mtag = self.mtag_rnn.predict_sequence([self.tlookup[t] for t in morph_tags])[-1] rev_last_state_mtag = \ self.mtag_rnn.predict_sequence([self.tlookup[t] for t in reversed(morph_tags)])[ -1] current_encoding_mtag = concatenate([last_state_mtag, rev_last_state_mtag]) morphtag_encodings.append(current_encoding_mtag) if self.morphTagFlag: forward = [] for idx, encoding in enumerate(morphtag_encodings): if idx == 0: forward.append(encoding) else: updated = morphtag_encodings[idx-1]*self.mtag_encoding_composition_alpha \ + encoding*(1-self.mtag_encoding_composition_alpha) forward.append(updated) if self.mtag_encoding_composition_type == "w_sum": upper_morphtag_encodings = forward elif self.mtag_encoding_composition_type == "bi_w_sum": backward = [] for idx, r_encoding in enumerate(morphtag_encodings): if idx == len(morphtag_encodings) - 1: backward.append(r_encoding) else: updated = morphtag_encodings[idx+1]*self.mtag_encoding_composition_alpha \ + r_encoding*(1-self.mtag_encoding_composition_alpha) backward.append(updated) upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)] elif self.mtag_encoding_composition_type == "bi_mlp": forward = [] backward = [] for idx, encoding in enumerate(morphtag_encodings): if idx != 0: f = self.mtag_encoding_f_w * concatenate([encoding, morphtag_encodings[idx-1]]) \ + self.mtag_encoding_f_b forward.append(f) else: forward.append(encoding) if idx != len(morphtag_encodings) - 1: b = self.mtag_encoding_b_w * concatenate([encoding, morphtag_encodings[idx+1]]) \ + self.mtag_encoding_b_b backward.append(b) else: backward.append(encoding) upper_morphtag_encodings = [f+b for f,b in zip(forward, backward)] else: upper_morphtag_encodings = morphtag_encodings for entry, mtag in zip(conll_sentence, upper_morphtag_encodings): entry.vec = concatenate([entry.vec, dynet.dropout(mtag, 0.33)]) for idx, entry in enumerate(conll_sentence): entry.pos_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None #POS tagging loss lstm_forward = self.pos_builders[0].initial_state() lstm_backward = self.pos_builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.pos_lstms[1] = lstm_forward.output() rentry.pos_lstms[0] = lstm_backward.output() for entry in conll_sentence: entry.pos_vec = concatenate(entry.pos_lstms) blstm_forward = self.pos_bbuilders[0].initial_state() blstm_backward = self.pos_bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.pos_vec) blstm_backward = blstm_backward.add_input(rentry.pos_vec) entry.pos_lstms[1] = blstm_forward.output() rentry.pos_lstms[0] = blstm_backward.output() concat_layer = [dynet.dropout(concatenate(entry.pos_lstms), 0.33) for entry in conll_sentence] outputFFlayer = self.ffSeqPredictor.predict_sequence(concat_layer) posIDs = [self.pos.get(entry.pos) for entry in conll_sentence] for pred, gold in zip(outputFFlayer, posIDs): posErrs.append(self.pick_neg_log(pred, gold)) # Add predicted pos tags for entry, poses in zip(conll_sentence, outputFFlayer): entry.vec = concatenate([entry.vec, dynet.dropout(self.plookup[np.argmax(poses.value())], 0.33)]) entry.lstms = [entry.vec, entry.vec] #Parsing losses if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self.__evaluate(conll_sentence) gold = [entry.parent_id for entry in conll_sentence] heads = decoder.parse_proj(scores, gold if self.costaugFlag else None) if self.labelsFlag: concat_layer = [dynet.dropout(self.__getRelVector(conll_sentence, head, modifier + 1), 0.33) for modifier, head in enumerate(gold[1:])] outputFFlayer = self.ffRelPredictor.predict_sequence(concat_layer) relIDs = [self.rels[conll_sentence[modifier + 1].relation] for modifier, _ in enumerate(gold[1:])] for pred, goldid in zip(outputFFlayer, relIDs): lerrs.append(self.pick_neg_log(pred, goldid)) e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) eerrors += e if e > 0: loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) eloss += (e) mloss += (e) errs.extend(loss) etotal += len(conll_sentence) if iSentence % 1 == 0: if len(errs) > 0 or len(lerrs) > 0 or len(posErrs) > 0 or len(segErrs) > 0 or len(mTagErrs) > 0: eerrs = (esum(errs + lerrs + posErrs + segErrs + mTagErrs)) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] posErrs = [] segErrs = [] mTagErrs = [] renew_cg() print("Loss: %.4f" % (mloss / iSentence))
def __init__(self, vocab, ner, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) if options.learning_rate is not None: self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) print("Adam initial learning rate:", options.learning_rate) self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.ner = {word: ind for ind, word in enumerate(ner)} self.id2ner = {ind: word for ind, word in enumerate(ner)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.id2rels = rels # print self.rels # print self.id2rels self.nerdims = options.nembedding_dims self.mixture_weight = options.mixture_weight #self.posCount = postagCount #self.pos2id = {word: ind + 1 for ind, word in enumerate(postagCount.keys())} #self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.nerlookup = self.model.add_lookup_parameters( (len(ner), self.nerdims)) #self.plookup = self.model.add_lookup_parameters((len(postagCount.keys()) + 1, self.pdims)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file( options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.ner_builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model) ] self.ner_bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] if self.bibiFlag: self.builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] self.bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] elif self.layers > 0: self.builders = [ VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] else: self.builders = [ SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] # self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.ner), softmax)) self.hidden_units = options.hidden_units self.char_rnn = RNNSequencePredictor( LSTMBuilder(1, self.cdims, self.cdims, self.model)) self.crf_module = CRF(self.model, self.id2ner) self.tanh_layer_W = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.tanh_layer_b = self.model.add_parameters((self.hidden_units)) self.last_layer_W = self.model.add_parameters( (len(self.ner), self.hidden_units)) self.last_layer_b = self.model.add_parameters((len(self.ner))) W = orthonormal_initializer(self.hidden_units, 2 * self.ldims) self.head_layer_W = self.model.parameters_from_numpy(W) self.head_layer_b = self.model.add_parameters( (self.hidden_units, ), init=dynet.ConstInitializer(0.)) self.dep_layer_W = self.model.parameters_from_numpy(W) self.dep_layer_b = self.model.add_parameters( (self.hidden_units, ), init=dynet.ConstInitializer(0.)) self.rel_U = self.model.add_parameters( (len(self.rels) * self.hidden_units, self.hidden_units), init=dynet.ConstInitializer(0.)) self.rel_W = self.model.parameters_from_numpy( orthonormal_initializer(len(self.rels), 2 * self.hidden_units)) #self.rel_W = self.model.add_parameters((len(self.rels), self.hidden_units * 2)) self.rel_b = self.model.add_parameters((len(self.rels), ), init=dynet.ConstInitializer(0.))
class jNeRE: def __init__(self, vocab, ner, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) if options.learning_rate is not None: self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) print("Adam initial learning rate:", options.learning_rate) self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.ner = {word: ind for ind, word in enumerate(ner)} self.id2ner = {ind: word for ind, word in enumerate(ner)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.id2rels = rels # print self.rels # print self.id2rels self.nerdims = options.nembedding_dims self.mixture_weight = options.mixture_weight #self.posCount = postagCount #self.pos2id = {word: ind + 1 for ind, word in enumerate(postagCount.keys())} #self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.nerlookup = self.model.add_lookup_parameters( (len(ner), self.nerdims)) #self.plookup = self.model.add_lookup_parameters((len(postagCount.keys()) + 1, self.pdims)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file( options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.ner_builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model) ] self.ner_bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] if self.bibiFlag: self.builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] self.bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] elif self.layers > 0: self.builders = [ VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] else: self.builders = [ SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] # self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.ner), softmax)) self.hidden_units = options.hidden_units self.char_rnn = RNNSequencePredictor( LSTMBuilder(1, self.cdims, self.cdims, self.model)) self.crf_module = CRF(self.model, self.id2ner) self.tanh_layer_W = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.tanh_layer_b = self.model.add_parameters((self.hidden_units)) self.last_layer_W = self.model.add_parameters( (len(self.ner), self.hidden_units)) self.last_layer_b = self.model.add_parameters((len(self.ner))) W = orthonormal_initializer(self.hidden_units, 2 * self.ldims) self.head_layer_W = self.model.parameters_from_numpy(W) self.head_layer_b = self.model.add_parameters( (self.hidden_units, ), init=dynet.ConstInitializer(0.)) self.dep_layer_W = self.model.parameters_from_numpy(W) self.dep_layer_b = self.model.add_parameters( (self.hidden_units, ), init=dynet.ConstInitializer(0.)) self.rel_U = self.model.add_parameters( (len(self.rels) * self.hidden_units, self.hidden_units), init=dynet.ConstInitializer(0.)) self.rel_W = self.model.parameters_from_numpy( orthonormal_initializer(len(self.rels), 2 * self.hidden_units)) #self.rel_W = self.model.add_parameters((len(self.rels), self.hidden_units * 2)) self.rel_b = self.model.add_parameters((len(self.rels), ), init=dynet.ConstInitializer(0.)) def pick_neg_log(self, pred, gold): return -dynet.log(dynet.pick(pred, gold)) def Save(self, filename): self.model.save(filename) def Load(self, filename): self.model.populate(filename) def Predict(self, test_data): # with open(conll_path, 'r') as conllFP: outputPredNER = {} id2arg2rel = {} outputPredRel = {} count = 0.0 nercount = 0.0 for sentenceID in test_data: sentence = test_data[sentenceID] for entry in sentence: wordvec = self.wlookup[int(self.vocab.get( entry.norm, 0))] if self.wdims > 0 else None last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = concatenate( filter(None, [wordvec, last_state, rev_last_state])) entry.ner_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None # Predicted ner tags lstm_forward = self.ner_builders[0].initial_state() lstm_backward = self.ner_builders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.ner_lstms[1] = lstm_forward.output() rentry.ner_lstms[0] = lstm_backward.output() for entry in sentence: entry.ner_vec = concatenate(entry.ner_lstms) blstm_forward = self.ner_bbuilders[0].initial_state() blstm_backward = self.ner_bbuilders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): blstm_forward = blstm_forward.add_input(entry.ner_vec) blstm_backward = blstm_backward.add_input(rentry.ner_vec) entry.ner_lstms[1] = blstm_forward.output() rentry.ner_lstms[0] = blstm_backward.output() concat_layer = [concatenate(entry.ner_lstms) for entry in sentence] context_representations = [dynet.tanh(dynet.affine_transform([self.tanh_layer_b.expr(), self.tanh_layer_W.expr(), context])) \ for context in concat_layer] tag_scores = [dynet.affine_transform([self.last_layer_b.expr(), self.last_layer_W.expr(), context]) \ for context in context_representations] observations = [ dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])], d=0) for obs in tag_scores ] predicted_ner_indices, _ = self.crf_module.viterbi_decoding( observations) predicted_nertags = [ self.id2ner[idx] for idx in predicted_ner_indices ] outputPredNER[sentenceID] = predicted_nertags # Add ner embeddings for entry, ner in zip(sentence, predicted_ner_indices): entry.vec = concatenate([entry.vec, self.nerlookup[ner]]) entry.lstms = [entry.vec, entry.vec] # Relation losses if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() concat_layer = [concatenate(entry.lstms) for entry in sentence] head_context_representations = [dynet.tanh(dynet.affine_transform([self.head_layer_b.expr(), self.head_layer_W.expr(), context])) \ for context in concat_layer] dep_context_representations = [dynet.tanh(dynet.affine_transform([self.dep_layer_b.expr(), self.dep_layer_W.expr(), context])) \ for context in concat_layer] nerids = [] for ind, tag in enumerate(predicted_nertags): # print(ind, tag) if str(tag).startswith("L-") or str(tag).startswith("U-"): if not str(tag).endswith("-Other"): nerids.append(ind) id2arg2rel[sentenceID] = {} for ind1 in nerids: for ind2 in nerids: if ind1 != ind2: if predicted_nertags[ind1] in [ "L-Peop", "U-Peop" ] and predicted_nertags[ind2] in [ "L-Peop", "U-Peop", "L-Org", "U-Org", "L-Loc", "U-Loc" ]: id2arg2rel[sentenceID][(ind1, ind2)] = "NEG" if predicted_nertags[ind1] in [ "L-Loc", "U-Loc", "L-Org", "U-Org" ] and predicted_nertags[ind2] in ["L-Loc", "U-Loc"]: id2arg2rel[sentenceID][(ind1, ind2)] = "NEG" # id2arg2rel[sentenceID][(ind1, ind2)] = "NEG" for (head, dep) in id2arg2rel[sentenceID]: # print (head, dep), pairrels[(head, dep)] linear = self.rel_U.expr() * dep_context_representations[dep] linear = dynet.reshape(linear, (self.hidden_units, len(self.rels))) bilinear = dynet.transpose( head_context_representations[head]) * linear biaffine = dynet.transpose( bilinear) + self.rel_W.expr() * concatenate([ head_context_representations[head], dep_context_representations[dep] ]) + self.rel_b.expr() id2arg2rel[sentenceID][(head, dep)] = self.id2rels[np.argmax( softmax(biaffine).value())] outputPredRel[sentenceID] = {} for (head, dep) in id2arg2rel[sentenceID]: rel = id2arg2rel[sentenceID][(head, dep)] if rel != "NEG": outputPredRel[sentenceID][(head, dep)] = rel # else: # i_rel = id2arg2rel[sentenceID][(dep, head)] # if str(i_rel).endswith("-1"): # outputPredRel[sentenceID][(head, dep)] = i_rel[:-2] renew_cg() # print "----" # print outputPredNER[sentenceID] # print id2arg2rel[sentenceID] # print outputPredRel[sentenceID] return outputPredNER, outputPredRel def Train(self, train_data, train_id2nerBILOU, id2arg2rel, isTrain=True): eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() nwtotal = 0 if isTrain: shuffledData = train_data.keys() random.shuffle(shuffledData) # errs = [] lerrs = [] nerErrs = [] for iSentence, sentenceId in enumerate(shuffledData): if iSentence % 100 == 0 and iSentence != 0: print "Processing sentence number: %d" % iSentence, ", Loss: %.4f" % ( eloss / etotal), ", Time: %.2f" % (time.time() - start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 sentence = train_data[sentenceId] goldNers = train_id2nerBILOU[sentenceId].strip().split() for entry in sentence: c = float(self.wordsCount.get(entry.norm, 0)) dropFlag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup[ int(self.vocab.get(entry.norm, 0) ) if dropFlag else 0] if self.wdims > 0 else None last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in entry.idChars])[-1] rev_last_state = self.char_rnn.predict_sequence( [self.clookup[c] for c in reversed(entry.idChars)])[-1] entry.vec = dynet.dropout( concatenate( filter(None, [wordvec, last_state, rev_last_state])), 0.33) entry.ner_lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None # ner tagging loss lstm_forward = self.ner_builders[0].initial_state() lstm_backward = self.ner_builders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.ner_lstms[1] = lstm_forward.output() rentry.ner_lstms[0] = lstm_backward.output() for entry in sentence: entry.ner_vec = concatenate(entry.ner_lstms) blstm_forward = self.ner_bbuilders[0].initial_state() blstm_backward = self.ner_bbuilders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): blstm_forward = blstm_forward.add_input(entry.ner_vec) blstm_backward = blstm_backward.add_input(rentry.ner_vec) entry.ner_lstms[1] = blstm_forward.output() rentry.ner_lstms[0] = blstm_backward.output() concat_layer = [ dynet.dropout(concatenate(entry.ner_lstms), 0.33) for entry in sentence ] context_representations = [dynet.tanh(dynet.affine_transform([self.tanh_layer_b.expr(), self.tanh_layer_W.expr(), context])) \ for context in concat_layer] tag_scores = [dynet.affine_transform([self.last_layer_b.expr(), self.last_layer_W.expr(), context]) \ for context in context_representations] nerIDs = [self.ner.get(tag) for tag in goldNers] loss = self.crf_module.neg_log_loss(tag_scores, nerIDs) # loss, _ = self.crf_module.viterbi_loss(tag_scores, nerIDs) nerErrs.append(loss) # observations = [dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])], d=0) for obs in # tag_scores] # predicted_ner_indices, _ = self.crf_module.viterbi_decoding(observations) # Add ner embeddings for entry, ner in zip(sentence, nerIDs): entry.vec = concatenate( [entry.vec, dynet.dropout(self.nerlookup[ner], 0.33)]) entry.lstms = [entry.vec, entry.vec] # Relation losses if self.blstmFlag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibiFlag: for entry in sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(sentence, reversed(sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input( rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() concat_layer = [ dynet.dropout(concatenate(entry.lstms), 0.33) for entry in sentence ] head_context_representations = [dynet.tanh(dynet.affine_transform([self.head_layer_b.expr(), self.head_layer_W.expr(), context])) \ for context in concat_layer] dep_context_representations = [dynet.tanh(dynet.affine_transform([self.dep_layer_b.expr(), self.dep_layer_W.expr(), context])) \ for context in concat_layer] pairrels = id2arg2rel[sentenceId] for (head, dep) in pairrels: # print (head, dep), pairrels[(head, dep)] linear = self.rel_U.expr( ) * dep_context_representations[dep] linear = dynet.reshape(linear, (self.hidden_units, len(self.rels))) bilinear = dynet.transpose( head_context_representations[head]) * linear biaffine = dynet.transpose( bilinear) + self.rel_W.expr() * concatenate([ head_context_representations[head], dep_context_representations[dep] ]) + self.rel_b.expr() lerrs.append( self.pick_neg_log(softmax(biaffine), self.rels.get(pairrels[(head, dep)]))) etotal += len(sentence) nwtotal += len(sentence) if iSentence % 1 == 0: if len(lerrs) > 0 or len(nerErrs) > 0: # if len(nerErrs) > 0: eerrs = esum(nerErrs + lerrs) eerrs.scalar_value() eloss += eerrs.scalar_value() mloss += eloss eerrs.backward() self.trainer.update() # errs = [] lerrs = [] nerErrs = [] renew_cg() print "Loss: %.4f" % (mloss / nwtotal)
class Learner: def __init__(self, c2i, options): self.model = dy.ParameterCollection() random.seed(1) self.trainer = dy.AdamTrainer(self.model) self.dropput_rate = options.dropout_rate self.ldims = options.lstm_dims self.cdims = options.cembedding_dims self.c2i = c2i self.W_d = self.model.add_parameters((self.ldims, 2 * self.ldims)) self.W_db = self.model.add_parameters(self.ldims) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.word_encoder = RNNSequencePredictor( dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model)) self.context_encoder = [ dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.ldims, self.ldims, self.model) ] self.output_encoder = dy.VanillaLSTMBuilder(1, self.cdims, self.ldims, self.model) self.decoder = dy.VanillaLSTMBuilder(2, self.cdims, self.ldims, self.model) self.W_s = self.model.add_parameters((len(self.c2i), self.ldims)) self.W_sb = self.model.add_parameters((len(self.c2i))) def save(self, filename): self.model.save(filename) def load(self, filename): self.model.populate(filename) def predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate( read_conll( conllFP, self.c2i, )): dy.renew_cg() conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] # I- Word encoding for entry in conll_sentence: c_embeddings = [] for c in entry.idChars: # TODO : try different formulas like alpha/(alpha + #(w)) dropFlag = False # random.random() < self.dropput_rate c_embedding = self.clookup[c if not dropFlag else 0] c_embeddings.append(c_embedding) e_i = self.word_encoder.predict_sequence(c_embeddings)[-1] entry.word_enc = dy.dropout(e_i, self.dropput_rate) entry.context_lstms = [entry.word_enc, entry.word_enc] # II- Context encoding blstm_forward = self.context_encoder[0].initial_state() blstm_backward = self.context_encoder[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.word_enc) blstm_backward = blstm_backward.add_input(rentry.word_enc) entry.context_lstms[1] = blstm_forward.output() rentry.context_lstms[0] = blstm_backward.output() for entry in conll_sentence: entry.context_enc = dy.concatenate(entry.context_lstms) """ # III- Output encoding c_embeddings = [] for entry in conll_sentence: for f in entry.idFeats: # TODO : try different formulas like alpha/(alpha + #(w)) dropFlag = False # random.random() < self.dropput_rate c_embedding = self.clookup[f if not dropFlag else 0] c_embeddings.append(c_embedding) t_i = self.output_encoder.predict_sequence(c_embeddings) for entry, vec in zip(conll_sentence, t_i): entry.output_enc = dy.dropout(vec, self.dropput_rate) """ # IV- Decoder # Init for Context encoding for entry in conll_sentence: entry.context_enc = dy.rectify(self.W_d.expr() * entry.context_enc + self.W_db.expr()) """ # Init for Word and Output encoding for entry in conll_sentence: entry.comb = entry.word_enc + entry.output_enc """ # predicted_sequences = [] output_state = self.output_encoder.initial_state() for entry in conll_sentence: if output_state.output(): entry.comb = entry.word_enc + output_state.output() else: entry.comb = entry.word_enc decoder_state = self.decoder.initial_state().set_s([ entry.context_enc, dy.tanh(entry.context_enc), entry.comb, dy.tanh(entry.comb) ]) predicted_sequence = [] predicted_char = self.c2i["<s>"] counter = 0 while True: counter += 1 decoder_state.add_input(self.clookup[predicted_char]) probs = self._get_probs(decoder_state.output()) predicted_char = probs.npvalue().argmax() if predicted_char != self.c2i["</s>"] and counter < 50: predicted_sequence.append(predicted_char) else: break for seq_i in predicted_sequence: tag_embedding = self.clookup[seq_i] decoder_state.add_input(tag_embedding) entry.predicted_sequence = predicted_sequence # predicted_sequences.append(predicted_sequence) yield conll_sentence def _get_probs(self, rnn_output): output_w = dy.parameter(self.W_s) output_b = dy.parameter(self.W_sb) probs = dy.softmax(output_w * rnn_output + output_b) return probs def train(self, conll_path): total = 0.0 with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, self.c2i)) random.shuffle(shuffledData) for iSentence, sentence in enumerate(shuffledData): conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] # I- Word encoding for entry in conll_sentence: c_embeddings = [] for c in entry.idChars: # TODO : try different formulas like alpha/(alpha + #(w)) dropFlag = False # random.random() < self.dropput_rate c_embedding = self.clookup[c if not dropFlag else 0] c_embeddings.append(c_embedding) e_i = self.word_encoder.predict_sequence(c_embeddings)[-1] entry.word_enc = dy.dropout(e_i, self.dropput_rate) entry.context_lstms = [entry.word_enc, entry.word_enc] # II- Context encoding blstm_forward = self.context_encoder[0].initial_state() blstm_backward = self.context_encoder[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.word_enc) blstm_backward = blstm_backward.add_input(rentry.word_enc) entry.context_lstms[1] = blstm_forward.output() rentry.context_lstms[0] = blstm_backward.output() for entry in conll_sentence: entry.context_enc = dy.concatenate(entry.context_lstms) # III- Output encoding c_embeddings = [] for entry in conll_sentence: for f in entry.idFeats: # TODO : try different formulas like alpha/(alpha + #(w)) dropFlag = False # random.random() < self.dropput_rate c_embedding = self.clookup[f if not dropFlag else 0] c_embeddings.append(c_embedding) output_encoder = RNNSequencePredictor(self.output_encoder) t_i = output_encoder.predict_sequence(c_embeddings) for entry, vec in zip(conll_sentence, t_i): entry.output_enc = dy.dropout(vec, self.dropput_rate) # IV- Decoder # Init for Context encoding for entry in conll_sentence: entry.context_enc = dy.rectify(self.W_d.expr() * entry.context_enc + self.W_db.expr()) # Init for Word and Output encoding for entry in conll_sentence: entry.comb = entry.word_enc + entry.output_enc probs = [] losses = [] for entry in conll_sentence: decoder_state = self.decoder.initial_state().set_s([ entry.context_enc, dy.tanh(entry.context_enc), entry.comb, dy.tanh(entry.comb) ]) for g_c in entry.decoder_input: decoder_state = decoder_state.add_input( self.clookup[g_c]) p = self._get_probs(decoder_state.output()) probs.append(p) losses += [ -dy.log(dy.pick(p, o)) for p, o in zip(probs, entry.decoder_input) ] total_losses = dy.esum(losses) cur_loss = total_losses.scalar_value() total += cur_loss total_losses.backward() self.trainer.update() if iSentence != 0 and iSentence % 500 == 0: print("Sentence:" + str(iSentence) + " Loss:" + str(total / (iSentence + 1))) dy.renew_cg()
def train(self, conll_path): total = 0.0 with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, self.c2i)) random.shuffle(shuffledData) for iSentence, sentence in enumerate(shuffledData): conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] # I- Word encoding for entry in conll_sentence: c_embeddings = [] for c in entry.idChars: # TODO : try different formulas like alpha/(alpha + #(w)) dropFlag = False # random.random() < self.dropput_rate c_embedding = self.clookup[c if not dropFlag else 0] c_embeddings.append(c_embedding) e_i = self.word_encoder.predict_sequence(c_embeddings)[-1] entry.word_enc = dy.dropout(e_i, self.dropput_rate) entry.context_lstms = [entry.word_enc, entry.word_enc] # II- Context encoding blstm_forward = self.context_encoder[0].initial_state() blstm_backward = self.context_encoder[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.word_enc) blstm_backward = blstm_backward.add_input(rentry.word_enc) entry.context_lstms[1] = blstm_forward.output() rentry.context_lstms[0] = blstm_backward.output() for entry in conll_sentence: entry.context_enc = dy.concatenate(entry.context_lstms) # III- Output encoding c_embeddings = [] for entry in conll_sentence: for f in entry.idFeats: # TODO : try different formulas like alpha/(alpha + #(w)) dropFlag = False # random.random() < self.dropput_rate c_embedding = self.clookup[f if not dropFlag else 0] c_embeddings.append(c_embedding) output_encoder = RNNSequencePredictor(self.output_encoder) t_i = output_encoder.predict_sequence(c_embeddings) for entry, vec in zip(conll_sentence, t_i): entry.output_enc = dy.dropout(vec, self.dropput_rate) # IV- Decoder # Init for Context encoding for entry in conll_sentence: entry.context_enc = dy.rectify(self.W_d.expr() * entry.context_enc + self.W_db.expr()) # Init for Word and Output encoding for entry in conll_sentence: entry.comb = entry.word_enc + entry.output_enc probs = [] losses = [] for entry in conll_sentence: decoder_state = self.decoder.initial_state().set_s([ entry.context_enc, dy.tanh(entry.context_enc), entry.comb, dy.tanh(entry.comb) ]) for g_c in entry.decoder_input: decoder_state = decoder_state.add_input( self.clookup[g_c]) p = self._get_probs(decoder_state.output()) probs.append(p) losses += [ -dy.log(dy.pick(p, o)) for p, o in zip(probs, entry.decoder_input) ] total_losses = dy.esum(losses) cur_loss = total_losses.scalar_value() total += cur_loss total_losses.backward() self.trainer.update() if iSentence != 0 and iSentence % 500 == 0: print("Sentence:" + str(iSentence) + " Loss:" + str(total / (iSentence + 1))) dy.renew_cg()