def Predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate(read_conll(conllFP, False)): self.Init() forest = ParseForest(sentence) self.getWordEmbeddings(forest, False) for root in forest.roots: root.lstms = [self.builders[0].initial_state().add_input(root.vec), self.builders[1].initial_state().add_input(root.vec)] while len(forest.roots) > 1: self.__evaluate(forest, False) bestParent, bestChild, bestScore = None, None, float("-inf") bestIndex, bestOp = None, None roots = forest.roots for i in xrange(len(forest.roots) - 1): for irel, rel in enumerate(self.irels): for op in xrange(2): if bestScore < roots[i].scores[irel][op] and (i + (1 - op)) > 0: bestParent, bestChild = i + op, i + (1 - op) bestScore = roots[i].scores[irel][op] bestIndex, bestOp = i, op bestRelation, bestIRelation = rel, irel for j in xrange(max(0, bestIndex - self.k - 1), min(len(forest.roots), bestIndex + self.k + 2)): roots[j].scores = None roots[bestChild].pred_parent_id = forest.roots[bestParent].id roots[bestChild].pred_relation = bestRelation roots[bestParent].lstms[bestOp] = roots[bestParent].lstms[bestOp].add_input((self.activation(self.lstm2lstmbias + self.lstm2lstm * concatenate([roots[bestChild].lstms[0].output(), lookup(self.model["rels-lookup"], bestIRelation), roots[bestChild].lstms[1].output()])))) forest.Attach(bestParent, bestChild) renew_cg() yield sentence
def predict(self, sentences): self.getWordEmbeddings(sentences, False) for sentence in sentences: stack = ParseForest([]) buf = ParseForest(sentence) for root in sentence: root.lstms = [root.vec for _ in range(self.nnvecs)] hoffset = 1 if self.headFlag else 0 while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, False) best = max(chain(*scores), key=itemgetter(2)) if best[1] == 2: stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 0: child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec elif best[1] == 1: child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec
def Train(self, trainData, options): mloss = 0.0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ninf = -float('inf') ts = time() start = ts random.shuffle(trainData) # in certain cases the data will already have been shuffled after being read from file or while creating dev data print "Length of training data: ", len(trainData) errs = [] self.feature_extractor.Init(options) for iSentence, sentence in enumerate(trainData,1): if iSentence % 100 == 0: loss_message = 'Processing sentence number: %d'%iSentence + \ ' Loss: %.3f'%(eloss / etotal)+ \ ' Errors: %.3f'%((float(eerrors)) / etotal)+\ ' Labeled Errors: %.3f'%(float(lerrors) / etotal)+\ ' Time: %.2gs'%(time()-start) print loss_message start = time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 sentence = deepcopy(sentence) # ensures we are working with a clean copy of sentence and allows memory to be recycled each time round the loop conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.feature_extractor.getWordEmbeddings(conll_sentence, True, options) stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 for root in conll_sentence: root.lstms = [root.vec] if self.headFlag else [] #root.lstms += [self.feature_extractor.paddingVec for _ in range(self.nnvecs - hoffset)] if not self.recursive_composition: root.lstms += [self.feature_extractor.paddingVec for _ in range(self.nnvecs - hoffset)] else: root.lstms += [root.vec] root.lstm = None while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, True) #to ensure that we have at least one wrong operation scores.append([(None, 4, ninf ,None)]) stack_ids = [sitem.id for sitem in stack.roots] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] costs, shift_case = self.calculate_cost(scores,s0,s1,b,beta,stack_ids) bestValid = list(( s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == SHIFT or s[1] == SWAP or s[0] == s0[0].relation ) )) bestValid = max(bestValid, key=itemgetter(2)) bestWrong = max(( s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != SHIFT and s[1] != SWAP and s[0] != s0[0].relation ) ), key=itemgetter(2)) #force swap if costs[SWAP]== 0: best = bestValid else: #select a transition to follow # + aggresive exploration #1: might want to experiment with that parameter if bestWrong[1] == SWAP: best = bestValid else: best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1) ) else bestWrong if best[1] == LEFT_ARC or best[1] ==RIGHT_ARC: child = s0[0] #updates for the dynamic oracle if self.oracle: self.oracle_updates(best,b,s0,stack_ids,shift_case) self.apply_transition(best,stack,buf,hoffset) if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) #labeled errors if best[1] == LEFT_ARC or best[1] ==RIGHT_ARC: if (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): lerrors += 1 #attachment error if child.pred_parent_id != child.parent_id: eerrors += 1 #??? when did this happen and why? if best[1] == 0 or best[1] == 2: etotal += 1 #footnote 8 in Eli's original paper if len(errs) > 50: # or True: eerrs = dy.esum(errs) scalar_loss = eerrs.scalar_value() #forward eerrs.backward() self.trainer.update() errs = [] lerrs = [] dy.renew_cg() self.feature_extractor.Init(options) if len(errs) > 0: eerrs = (dy.esum(errs)) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] dy.renew_cg() self.trainer.update() print "Loss: ", mloss/iSentence print "Total training time: %.2fs"%(time()-ts)
def Predict(self, treebanks, datasplit, options): reached_max_swap = 0 char_map = {} if options.char_map_file: char_map_fh = codecs.open(options.char_map_file,encoding='utf-8') char_map = json.loads(char_map_fh.read()) # should probably use a namedtuple in get_vocab to make this prettier print "Collecting test data vocab" _, test_words, test_chars, _, _, _, test_treebanks, test_langs = utils.get_vocab(treebanks,datasplit,char_map) # get external embeddings for the set of words and chars in the test vocab but not in the training vocab test_embeddings = defaultdict(lambda:{}) if options.word_emb_size > 0: new_test_words = set(test_words) - self.feature_extractor.words.viewkeys() print "Number of OOV word types at test time: %i (out of %i)"%(len(new_test_words),len(test_words)) if len(new_test_words) > 0: # no point loading embeddings if there are no words to look for for lang in test_langs: test_embeddings["words"].update(utils.get_external_embeddings(options,lang,new_test_words)) if len(test_langs) > 1 and test_embeddings["words"]: print "External embeddings found for %i words (out of %i)"%(len(test_embeddings["words"]),len(new_test_words)) if options.char_emb_size > 0: new_test_chars = set(test_chars) - self.feature_extractor.chars.viewkeys() print "Number of OOV char types at test time: %i (out of %i)"%(len(new_test_chars),len(test_chars)) if len(new_test_chars) > 0: for lang in test_langs: test_embeddings["chars"].update(utils.get_external_embeddings(options,lang,new_test_chars,chars=True)) if len(test_langs) > 1 and test_embeddings["chars"]: print "External embeddings found for %i chars (out of %i)"%(len(test_embeddings["chars"]),len(new_test_chars)) ts = time() data = utils.read_conll_dir(treebanks,datasplit,char_map=char_map) for iSentence, osentence in enumerate(data,1): sentence = deepcopy(osentence) reached_swap_for_i_sentence = False max_swap = 2*len(sentence) iSwap = 0 self.feature_extractor.Init(options) conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.feature_extractor.getWordEmbeddings(conll_sentence, False, options, test_embeddings) stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 for root in conll_sentence: root.lstms = [root.vec] if self.headFlag else [] if not self.recursive_composition: root.lstms += [self.feature_extractor.paddingVec for _ in range(self.nnvecs - hoffset)] else: root.lstms += [root.vec] root.lstm = None #only necessary for treeLSTM case root.composed_rep = root.vec.value() while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, False) best = max(chain(*(scores if iSwap < max_swap else scores[:3] )), key = itemgetter(2) ) if iSwap == max_swap and not reached_swap_for_i_sentence: reached_max_swap += 1 reached_swap_for_i_sentence = True print "reached max swap in %d out of %d sentences"%(reached_max_swap, iSentence) self.apply_transition(best,stack,buf,hoffset) if best[1] == SWAP: iSwap += 1 #keep in memory the information we need, not all the vectors oconll_sentence = [entry for entry in osentence if isinstance(entry, utils.ConllEntry)] oconll_sentence = oconll_sentence[1:] + [oconll_sentence[0]] for tok_o, tok in zip(oconll_sentence, conll_sentence): tok_o.pred_relation = tok.pred_relation tok_o.pred_parent_id = tok.pred_parent_id if self.recursive_composition: tok_o.composed_rep = tok.composed_rep yield osentence dy.renew_cg() print "Total prediction time: %.2fs"%(time()-ts)
def Train(self, conll_path): mloss = 0.0 errors = 0 batch = 0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ltotal = 0 ninf = -float('inf') hoffset = 1 if self.headFlag else 0 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, True)) random.shuffle(shuffledData) errs = [] eeloss = 0.0 self.Init() for iSentence, sentence in enumerate(shuffledData): # 每处理100个句子,输出信息 if iSentence % 100 == 0 and iSentence != 0: print '处理第 ', iSentence, ' 个句子,Loss:', eloss / etotal, 'Errors:', ( float(eerrors)) / etotal, 'Labeled Errors:', ( float(lerrors) / etotal), '用时', time.time() - start # logger.debug('处理第%s个句子,Loss:%s,Errors:%s,Labeled Errors:%s,用时:%s', iSentence, eloss / etotal, (float(eerrors)) / etotal, (float(lerrors) / etotal), time.time()-start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 ltotal = 0 conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.getWordEmbeddings(conll_sentence, True) # 初始化stack为空 stack = ParseForest([]) # 将句子放入buf中 buf = ParseForest(conll_sentence) for root in conll_sentence: # 词的LSTM输入为self.nnvecs个输入向量串联 root.lstms = [root.vec for _ in xrange(self.nnvecs)] hoffset = 1 if self.headFlag else 0 while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, True) scores.append([(None, 3, ninf, None)]) # alpha是栈中其他元素 alpha = stack.roots[:-2] if len(stack) > 2 else [] # s1为栈顶第二个元素 s1 = [stack.roots[-2]] if len(stack) > 1 else [] # s0为栈顶第一个元素 s0 = [stack.roots[-1]] if len(stack) > 0 else [] # b为buffer第一个元素 b = [buf.roots[0]] if len(buf) > 0 else [] # beta是buffer中其他元素 beta = buf.roots[1:] if len(buf) > 1 else [] left_cost = ( len([h for h in s1 + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[0]) > 0 else 1 right_cost = ( len([h for h in b + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[1]) > 0 else 1 shift_cost = ( len([h for h in s1 + alpha if h.id == b[0].parent_id]) + len([ d for d in s0 + s1 + alpha if d.parent_id == b[0].id ])) if len(scores[2]) > 0 else 1 costs = (left_cost, right_cost, shift_cost, 1) bestValid = max( (s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[0] == stack.roots[-1].relation)), key=itemgetter(2)) bestWrong = max( (s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[0] != stack.roots[-1].relation)), key=itemgetter(2)) best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1)) else bestWrong # shift,未得到relation if best[1] == 2: stack.roots.append(buf.roots[0]) del buf.roots[0] # left,head词是b0 elif best[1] == 0: child = stack.roots.pop() # head词是b0 parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec # right,head词是s0 elif best[1] == 1: child = stack.roots.pop() # head词是s0 parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec if bestValid[2] < bestWrong[2] + 1.0: # 损失函数 loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) if best[1] != 2 and ( child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): # id或者relation估计不准确,labelederror加1 lerrors += 1 if child.pred_parent_id != child.parent_id: # id估计不准确,unlabelederror加1 errors += 1 eerrors += 1 etotal += 1 if len(errs) > 50: # or True: #eerrs = ((esum(errs)) * (1.0/(float(len(errs))))) eerrs = esum(errs) scalar_loss = eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() self.Init() if len(errs) > 0: eerrs = (esum(errs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() # 根据损失函数求梯度 eerrs.backward() # 参数更新 self.trainer.update() errs = [] lerrs = [] renew_cg() self.trainer.update() print "Loss: ", mloss / iSentence
def Predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate(read_conll(conllFP, False)): self.Init() # 中文中pos为'X'的词为非语素词,需要过滤掉,否则会导致程序崩溃,英文无 # 中文宾州树库中训练集和发展集中无这种例子,但是测试集中包含个位数的例子 conll_sentence = [ entry for entry in sentence if (isinstance(entry, utils.ConllEntry) and entry.pos != 'X') ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.getWordEmbeddings(conll_sentence, False) # 初始stack为空 stack = ParseForest([]) # 初始buf为整个句子 buf = ParseForest(conll_sentence) for root in conll_sentence: root.lstms = [root.vec for _ in xrange(self.nnvecs)] hoffset = 1 if self.headFlag else 0 # 循环直到句子中的所有依存关系都找到 while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, False) best = max(chain(*scores), key=itemgetter(2)) if best[1] == 2: stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 0: child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec elif best[1] == 1: child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec renew_cg() yield sentence
def Train(self, conll_path, epoch): mloss = 0.0 errors = 0 batch = 0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ltotal = 0 ninf = -float('inf') hoffset = 1 if self.model.headFlag else 0 start = time.time() fout = open('loss_coco_0001_epoch_%d.log' % epoch, 'w') with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, True)) random.shuffle(shuffledData) errs = [] eeloss = 0.0 self.model.Init() non_proj = 0 for iSentence, sentence in enumerate(shuffledData): isProj = True if iSentence % 100 == 0 and iSentence != 0: print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', ( float(eerrors)) / etotal, 'Labeled Errors:', ( float(lerrors) / etotal), 'Time', time.time() - start #print "check" fout.write(str(eloss / etotal) + '\n') start = time.time() del eerrors, eloss, etotal, lerrors, ltotal eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 ltotal = 0 conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] if iSentence != 0: del sent_vec, lstms sent_vec = self.model.getWordEmbeddings(conll_sentence, True) lstms = [] stack = ParseForest([]) buf = ParseForest(conll_sentence) for i in range(len(sent_vec)): buf.roots[i].lstms = i lstms.append( [sent_vec[i] for _ in xrange(self.model.nnvecs)]) hoffset = 1 if self.model.headFlag else 0 while not (len(buf) == 1 and len(stack) == 0): scores = self.model.evaluate(stack, buf, True, lstms) scores.append([(None, self.model.num_transitioins, ninf, None)]) alpha = stack.roots[:-2] if len(stack) > 2 else [] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] left_cost = ( len([h for h in s1 + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[0]) > 0 else 1 right_cost = ( len([h for h in b + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[1]) > 0 else 1 shift_cost = ( len([h for h in s1 + alpha if h.id == b[0].parent_id]) + len([ d for d in s0 + s1 + alpha if d.parent_id == b[0].id ])) if len(scores[2]) > 0 else 1 reduce_cost = (len([ h for h in s1 + b + beta if h.id == s0[0].parent_id ]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[3]) > 0 else 1 #print "\nWord in Buff: " #for word in b+beta: # print "buf parent id: ", word.parent_id # print "word id: ", word.id # print "word: ", word.form if len(stack) > 0: #print "parent id: ", s0[0].parent_id if s0[0].parent_id == -1 and reduce_cost == 0: left_cost += 1 right_cost += 1 shift_cost += 1 costs = (left_cost, right_cost, shift_cost, reduce_cost, 1) #print "CCCOST ", costs #costs = (left_cost, right_cost, shift_cost, reduce_cost, isObj_cost, isPred_cost, 1) try: bestValid = max( (s for s in chain(*scores) if costs[s[1]] == 0 and (s[1] == 2 or s[0] == stack.roots[-1].relation or s[0] == None)), key=itemgetter(2)) except: print "length of stack: ", len(stack.roots) #for roots in stack.roots: # print roots.parent # print roots.children print "This is non projective" exit() non_proj += 1 isProj = False break bestWrong = max( (s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[0] != stack.roots[-1].relation)), key=itemgetter(2)) #best = bestValid if ( (not self.model.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1) ) else bestWrong best = bestValid #print "No. %d sentences, best[1] = %d" % (iSentence, best[1]) if best[1] == 2: stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 0: child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.model.rlMostFlag: lstms[parent.lstms][bestOp + hoffset] = lstms[ child.lstms][bestOp + hoffset] if self.model.rlFlag: lstms[parent.lstms][bestOp + hoffset] = sent_vec[ child.lstms] elif best[1] == 1: child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.model.rlMostFlag: lstms[parent.lstms][bestOp + hoffset] = lstms[ child.lstms][bestOp + hoffset] if self.model.rlFlag: lstms[parent.lstms][bestOp + hoffset] = sent_vec[ child.lstms] elif best[1] == 3: child = stack.roots.pop() child.pred_parent_id = -1 child.pred_relation = '_' bestOp = 3 if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) del loss if best[1] != 2 and ( child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): lerrors += 1 if child.pred_parent_id != child.parent_id: errors += 1 eerrors += 1 etotal += 1 del scores #print "Finish %d sentences" %(iSentence) #print os.system('nvidia-smi') #exit() if len(errs) > 50: # or True: eerrs = torch.sum(cat(errs)) scalar_loss = get_data(eerrs).numpy()[0] eerrs.backward() self.trainer.step() del eerrs errs = [] lerrs = [] self.model.Init() self.trainer.zero_grad() if len(errs) > 0: eerrs = torch.sum(cat(errs)) # * (1.0/(float(len(errs)))) get_data(eerrs).numpy()[0] eerrs.backward() self.trainer.step() del eerrs errs = [] lerrs = [] self.trainer.zero_grad() print "Loss: ", mloss / iSentence
def Train(self, shuffledData): mloss = 0.0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ninf = -float('inf') start = time.time() random.shuffle(shuffledData) print "Length of training data: ", len(shuffledData) errs = [] self.Init() trainData = shuffledData if self.debug: trainData = shuffledData[:200] for iSentence, sentence in enumerate(trainData): if iSentence % 100 == 0 and iSentence != 0: loss_message = 'Processing sentence number: %d'%iSentence + \ ' Loss: %.3f'%(eloss / etotal)+ \ ' Errors: %.3f'%((float(eerrors)) / etotal)+\ ' Labeled Errors: %.3f'%(float(lerrors) / etotal)+\ ' Time: %.2gs'%(time.time()-start) print loss_message start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.getWordEmbeddings(conll_sentence, True) stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 for root in conll_sentence: root.lstms = [root.vec] if self.headFlag else [] root.lstms += [ self.paddingVec for _ in range(self.nnvecs - hoffset) ] root.relation = root.relation if root.relation in self.rels else 'runk' while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, True) #to ensure that we have at least one wrong operation scores.append([(None, 4, ninf, None)]) stack_ids = [sitem.id for sitem in stack.roots] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] costs, shift_case = self.calculate_cost( scores, s0, s1, b, beta, stack_ids) bestValid = list( (s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[1] == 3 or s[0] == s0[0].relation))) if len(bestValid) < 1: print "===============dropping a sentence===============" break bestValid = max(bestValid, key=itemgetter(2)) bestWrong = max( (s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[1] != 3 and s[0] != s0[0].relation)), key=itemgetter(2)) #force swap if costs[3] == 0: best = bestValid else: #select a transition to follow # + aggresive exploration #1: might want to experiment with that parameter if bestWrong[1] == 3: best = bestValid else: best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1)) else bestWrong if best[1] == 2: #SHIFT if shift_case == 2: if b[0].parent_entry.id in stack_ids[:-1] and b[ 0].id in b[0].parent_entry.rdeps: b[0].parent_entry.rdeps.remove(b[0].id) blocked_deps = [ d for d in b[0].rdeps if d in stack_ids ] for d in blocked_deps: b[0].rdeps.remove(d) stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 3: #SWAP child = stack.roots.pop() buf.roots.insert(1, child) elif best[1] == 0: #LEFT-ARC s0[0].rdeps = [] if s0[0].id in s0[0].parent_entry.rdeps: s0[0].parent_entry.rdeps.remove(s0[0].id) child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] elif best[1] == 1: #RIGHT-ARC s0[0].rdeps = [] if s0[0].id in s0[0].parent_entry.rdeps: s0[0].parent_entry.rdeps.remove(s0[0].id) child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] #update the representation of head for attaching transitions if best[1] == 0 or best[1] == 1: #linear order if self.rlMostFlag: parent.lstms[best[1] + hoffset] = child.lstms[best[1] + hoffset] #actual children if self.rlFlag: parent.lstms[best[1] + hoffset] = child.vec if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) #labeled errors if best[1] != 2 and best[1] != 3 and ( child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): lerrors += 1 #attachment error if child.pred_parent_id != child.parent_id: eerrors += 1 if best[1] == 0 or best[1] == 2: etotal += 1 #footnote 8 in Eli's original paper if len(errs) > 50: # or True: eerrs = dy.esum(errs) scalar_loss = eerrs.scalar_value() #forward eerrs.backward() self.trainer.update() errs = [] lerrs = [] dy.renew_cg() self.Init() if len(errs) > 0: eerrs = (dy.esum(errs)) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] dy.renew_cg() self.trainer.update() print "Loss: ", mloss / iSentence
def forward(self, sentences, errs): tmp = time.time() self.getWordEmbeddings(sentences, True) self.ebd += time.time() - tmp dloss, deerrors, detotal = 0, 0, 0 for sentence in sentences: stack = ParseForest([]) buf = ParseForest(sentence) for root in sentence: root.lstms = [root.vec for _ in range(self.nnvecs)] hoffset = 1 if self.headFlag else 0 while not (len(buf) == 1 and len(stack) == 0): tmp = time.time() scores = self.__evaluate(stack, buf, True) self.evl += time.time() - tmp scores.append([(None, 3, -np.inf ,None)]) alpha = stack.roots[:-2] if len(stack) > 2 else [] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] left_cost = (len([h for h in s1 + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id])) if len(scores[0]) > 0 else 1 right_cost = (len([h for h in b + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id])) if len(scores[1]) > 0 else 1 shift_cost = (len([h for h in s1 + alpha if h.id == b[0].parent_id]) + len([d for d in s0 + s1 + alpha if d.parent_id == b[0].id])) if len(scores[2]) > 0 else 1 costs = (left_cost, right_cost, shift_cost, 1) bestValid = max((s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[0] == stack.roots[-1].relation)), key=itemgetter(2)) bestWrong = max((s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[0] != stack.roots[-1].relation)), key=itemgetter(2)) best = bestValid if ((not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1)) else bestWrong if best[1] == 2: stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 0: child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec elif best[1] == 1: child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] dloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) if best[1] != 2 and (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): if child.pred_parent_id != child.parent_id: deerrors += 1 detotal += 1 return dloss, deerrors, detotal
def Train(self, conll_path, options): mloss = 0.0 errors = 0 batch = 0 eloss = 0.0 #eerrors = 0 #lerrors = 0 etotal = 0 #ltotal = 0 max_quotient = float("-inf") min_quotient = float("inf") NUM_SAMPLES = options.num_samples #default 10 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, True)) random.shuffle(shuffledData) errs = [] #eeloss = 0.0 batch_errs = [] self.Init() for iSentence, sentence in enumerate(shuffledData): if iSentence % 100 == 0 and iSentence != 0: print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Time', time.time( ) - start #print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Labeled Errors:', (float(lerrors) / etotal) , 'Time', time.time()-start start = time.time() #eerrors = 0 eloss = 0.0 etotal = 0 #lerrors = 0 #ltotal = 0 sample_errs = [] sample_quotients = [] #print('Sentence: {}'.format(sentence)) DEBUG = random.random() < 0.0001 if DEBUG: print("Train sentence: {}".format( [e.form for e in sentence])) for _ in xrange(NUM_SAMPLES): forest = ParseForest(sentence) self.getWordEmbeddings(forest, True) for root in forest.roots: root.lstms = [ self.builders[0].initial_state().add_input( root.vec), self.builders[1].initial_state().add_input( root.vec) ] unassigned = { entry.id: sum([ 1 for pentry in sentence if pentry.parent_id == entry.id ]) for entry in sentence } #loss = 0 log_q_total = 0.0 log_p_total = 0.0 while len(forest.roots) > 1: self.__evaluate( forest, True) #NOTE(prkriley): this updates scores roots = forest.roots rootsIds = set([root.id for root in roots]) def _isValid(i): return (unassigned[roots[i].id] == 0) and ( (i > 0 and roots[i].parent_id == roots[i - 1].id) or (i < len(roots) - 1 and roots[i].parent_id == roots[i + 1].id)) valid_zs = [ j for j in xrange(1, len(roots)) if _isValid(j) ] z_scores = concatenate([r.zexpr for r in roots[1:]]) valid_z_scores = concatenate( [roots[j].zexpr for j in valid_zs]) p_zs = softmax(z_scores) #print("P(z): {}".format(p_zs.npvalue())) q_temperature = 16.0 q_zs = softmax(valid_z_scores * 1.0 / q_temperature) q_zs_numpy = q_zs.npvalue() q_zs_numpy /= np.sum(q_zs_numpy) if DEBUG: print("Valid z indices: {}".format(valid_zs)) print("Q(z): {}".format(q_zs_numpy)) valid_i = np.random.choice(len(valid_zs), p=q_zs_numpy) q_z = pick(q_zs, valid_i) i = valid_zs[valid_i] log_q_total += log(q_z).scalar_value() p_z = pick(p_zs, i - 1) log_p_total += log(p_z).scalar_value() irel = list(self.irels).index(roots[i].relation) op = 0 if roots[i].parent_id == roots[i - 1].id else 1 #TODO(prkriley): verify correctness of this index math presoftmax_p_y = [ val for tup in roots[i].exprs for val in tup ] if i < len(roots) - 1: neglog_p_y = pickneglogsoftmax( concatenate(presoftmax_p_y), irel * 2 + op) else: assert op == 0 presoftmax_p_y = presoftmax_p_y[::2] neglog_p_y = pickneglogsoftmax( concatenate(presoftmax_p_y), irel) neglog_p_z = pickneglogsoftmax(z_scores, i - 1) errs.append(neglog_p_y + neglog_p_z) log_p_total -= neglog_p_y.scalar_value() mloss += neglog_p_y.scalar_value() mloss += neglog_p_z.scalar_value() etotal += 1 selectedChild = i selectedIndex = i selectedOp = op selectedParent = i + [-1, 1][op] selectedIRel = irel for j in xrange( max(0, selectedIndex - self.k - 2), min(len(forest.roots), selectedIndex + self.k + 2)): roots[j].scores = None #NOTE(prkriley): counts number of real children that are still gettable unassigned[roots[selectedChild].parent_id] -= 1 #NOTE(prkriley): I think lstms[0] is the right one, [1] is the left... roots[selectedParent].lstms[selectedOp] = roots[ selectedParent].lstms[selectedOp].add_input( self.activation(self.lstm2lstm * noise( concatenate([ roots[selectedChild].lstms[0].output(), lookup(self.model["rels-lookup"], selectedIRel), roots[selectedChild].lstms[1].output() ]), 0.0) + self.lstm2lstmbias)) forest.Attach(selectedParent, selectedChild) #END OF SINGLE SAMPLE #TODO(prkriley): finalize loss, do update, etc eerrs = ( (esum(errs)) * (1.0 / (float(len(errs)))) ) #TODO(prkriley): consider removing this division #eerrs = esum(errs) #TODO(prkriley): scale by p/q which is exp(logp-logq) #print("logp: {}; logq: {}".format(log_p_total, log_q_total)) pq_quotient = np.exp(log_p_total - log_q_total) scaled_pq_quotient = pq_quotient * 1e3 #scaled_pq_quotient = min(scaled_pq_quotient, 1.5e-5) #scaled_pq_quotient = max(scaled_pq_quotient, 1.5e-8) #eerrs *= scaled_pq_quotient #print("P/Q: {}".format(pq_quotient)) max_quotient = max(scaled_pq_quotient, max_quotient) min_quotient = min(scaled_pq_quotient, min_quotient) eloss += eerrs.scalar_value() sample_errs.append(eerrs) sample_quotients.append(scaled_pq_quotient) errs = [] DEBUG = False #END OF SAMPLING #upper_clip = 5e-6 #lower_clip = 2e-8 #scale = 1.0 #if max_quotient < lower_clip: # scale = lower_clip / max_quotient ### #SCALING QUOTIENTS #max_sample_quotient = max(sample_quotients) #if max_sample_quotient > upper_clip: # scale = upper_clip / max_sample_quotient sum_quotients = sum(sample_quotients) PQ_NORMALIZE_SUM = options.pq_norm scale = PQ_NORMALIZE_SUM / sum_quotients sample_quotients = [e * scale for e in sample_quotients] #for q in sample_quotients: # assert q <= upper_clip * 1.1, "Large quotient: {}".format(q) ### if options.use_pq: sample_errs = [ e * q for (e, q) in zip(sample_errs, sample_quotients) ] final_error = esum(sample_errs) if not options.use_pq: assert len(sample_errs) == NUM_SAMPLES final_error *= (1.0 / (float(len(sample_errs)))) #TODO(prkriley): put final_error somewhere and update once we have N of them batch_errs.append(final_error) if len(batch_errs) >= options.batch_size: total_error = esum(batch_errs) total_error.backward() self.trainer.update() batch_errs = [] renew_cg() self.Init() #final_error.backward() #self.trainer.update() #renew_cg() #self.Init() #END OF EPOCH #FILE CLOSE if options.use_pq: print("Max Quotient: {}; Min Quotient: {}".format( max_quotient, min_quotient)) #self.trainer.update_epoch() #TODO(prkriley): verify that AdamTrainer handles everything this did before print "Loss: ", mloss / (iSentence * NUM_SAMPLES)
def Predict(self, conll_path): with open(conll_path, 'r') as conllFP: for iSentence, sentence in enumerate(read_conll(conllFP, False)): print("Sentence: {}".format([e.form for e in sentence])) self.Init() forest = ParseForest(sentence) self.getWordEmbeddings(forest, False) for root in forest.roots: root.lstms = [ self.builders[0].initial_state().add_input(root.vec), self.builders[1].initial_state().add_input(root.vec) ] ### #NOTE(prkriley): looking at truth here, but ONLY for reporting unassigned = { entry.id: sum([ 1 for pentry in sentence if pentry.parent_id == entry.id ]) for entry in sentence } ### while len(forest.roots) > 1: self.__evaluate(forest, False) #bestParent, bestChild, bestScore = None, None, float("-inf") #bestIndex, bestOp = None, None roots = forest.roots ### z_scores = concatenate([r.zexpr for r in roots[1:]]) p_z = softmax(z_scores).npvalue() bestIndex = np.argmax(p_z) + 1 print('P(z): {}'.format(p_z)) print('Best index: {} ({})'.format(bestIndex, roots[bestIndex].form)) valid_exprs = [ val for tup in roots[bestIndex].exprs for val in tup ] if bestIndex == len(roots) - 1: valid_exprs = valid_exprs[::2] p_y = softmax(concatenate(valid_exprs)) max_y_index = np.argmax( p_y.npvalue() ) #NOTE(prkriley): don't need to actually do softmax just to pick max if bestIndex < len(roots) - 1: bestOp = max_y_index % 2 bestIRelation = (max_y_index - bestOp) / 2 else: bestOp = 0 bestIRelation = max_y_index #TODO(prkriley): make sure op is valid bestChild = bestIndex bestParent = bestIndex + [-1, 1][bestOp] bestRelation = self.irels[bestIRelation] ### ### #NOTE(prkriley): again, using truth but only for reporting def _isValid(i): return (unassigned[roots[i].id] == 0) and ( (i > 0 and roots[i].parent_id == roots[i - 1].id) or (i < len(roots) - 1 and roots[i].parent_id == roots[i + 1].id)) valid_zs = [ j for j in xrange(1, len(roots)) if _isValid(j) ] valid_probs = [p_z[j - 1] for j in valid_zs] invalid_probs = [ p_z[j - 1] for j in xrange(1, len(roots)) if j not in valid_zs ] avg_valid_prob = sum(valid_probs) * 1.0 / len( valid_probs) if valid_probs else -1 avg_invalid_prob = sum(invalid_probs) * 1.0 / len( invalid_probs) if invalid_probs else -1 print("Avg valid prob: {}/{} = {}".format( sum(valid_probs), len(valid_probs), avg_valid_prob)) print("Avg invalid prob: {}/{} = {}".format( sum(invalid_probs), len(invalid_probs), avg_invalid_prob)) ### #for j in xrange(max(0, bestIndex - self.k - 1), min(len(forest.roots), bestIndex + self.k + 2)): for j in xrange( max(0, bestIndex - self.k - 2), min(len(forest.roots), bestIndex + self.k + 2)): roots[j].scores = None roots[bestChild].pred_parent_id = forest.roots[ bestParent].id roots[bestChild].pred_relation = bestRelation roots[bestParent].lstms[bestOp] = roots[bestParent].lstms[ bestOp].add_input((self.activation( self.lstm2lstmbias + self.lstm2lstm * concatenate([ roots[bestChild].lstms[0].output(), lookup(self.model["rels-lookup"], bestIRelation ), roots[bestChild].lstms[1].output() ])))) unassigned[roots[bestChild].parent_id] -= 1 forest.Attach(bestParent, bestChild) renew_cg() yield sentence
def Predict(self, data, prefix=None): reached_max_swap = 0 get_vectors = False if prefix: pref_idx = 0 if type(prefix) == list: pf = prefix[pref_idx] else: pf = prefix fcemb = codecs.open(pf + '-char-emb.vec', 'w', encoding='utf-8') fwemb = codecs.open(pf + '-word-emb.vec', 'w', encoding='utf-8') fenc = codecs.open(pf + '-encoder.vec', 'w', encoding='utf-8') get_vectors = True lang_name = '' for iSentence, osentence in enumerate(data, 1): if type(prefix) == list: if iSentence == 1: lang_name = osentence[0].language_id print 'Extract feature:', pf, lang_name else: if lang_name != osentence[0].language_id: fcemb.close() fwemb.close() fenc.close() pref_idx += 1 pf = prefix[pref_idx] fcemb = codecs.open(pf + '-char-emb.vec', 'w', encoding='utf-8') fwemb = codecs.open(pf + '-word-emb.vec', 'w', encoding='utf-8') fenc = codecs.open(pf + '-encoder.vec', 'w', encoding='utf-8') lang_name = osentence[0].language_id print 'Extract feature:', pf, lang_name sentence = deepcopy(osentence) reached_swap_for_i_sentence = False max_swap = 2 * len(sentence) iSwap = 0 self.feature_extractor.Init() conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] data_vec = self.feature_extractor.getWordEmbeddings( conll_sentence, False, get_vectors=get_vectors) if get_vectors: for dat in data_vec: # flabel.write(str(dat[0]) + '\t' + dat[1] + '\t' + dat[2] + '\t' + dat[3] + '\n') fcemb.write(','.join([str(x) for x in dat[4]]) + '\n') fwemb.write(','.join([str(x) for x in dat[5]]) + '\n') fenc.write(','.join([str(x) for x in dat[6]]) + '\n') stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 lang = conll_sentence[1].language_id for root in conll_sentence: root.lstms = [root.vec] if self.headFlag else [] if not self.feature_extractor.multiling or self.feature_extractor.shareWordLookup: root.lstms += [ self.feature_extractor.paddingVec for _ in range(self.nnvecs - hoffset) ] else: root.lstms += [ self.feature_extractor.paddingVecs[lang] for _ in range(self.nnvecs - hoffset) ] while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, False) best = max( chain(*(scores if iSwap < max_swap else scores[:3])), key=itemgetter(2)) if iSwap == max_swap and not reached_swap_for_i_sentence: reached_max_swap += 1 reached_swap_for_i_sentence = True print "reached max swap in %d out of %d sentences" % ( reached_max_swap, iSentence) self.apply_transition(best, stack, buf, hoffset) if best[1] == 3: iSwap += 1 dy.renew_cg() #keep in memory the information we need, not all the vectors oconll_sentence = [ entry for entry in osentence if isinstance(entry, utils.ConllEntry) ] oconll_sentence = oconll_sentence[1:] + [oconll_sentence[0]] for tok_o, tok in zip(oconll_sentence, conll_sentence): tok_o.pred_relation = tok.pred_relation tok_o.pred_parent_id = tok.pred_parent_id yield osentence if prefix: fcemb.close() fwemb.close() fenc.close()
def Train(self, conll_path, epoch): mloss = 0.0 errors = 0 batch = 0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ltotal = 0 ninf = -float('inf') hoffset = 1 if self.headFlag else 0 start1 = start = time.time() onlyNonProjectives = True with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, onlyNonProjectives)) random.shuffle(shuffledData) errs = [] eeloss = 0.0 self.Init() numOfSent = len(shuffledData) displayFreq = 500 if numOfSent < 2000: displayFreq = 200 for iSentence, sentence in enumerate(shuffledData): if iSentence % displayFreq == 0 and iSentence != 0: #print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Labeled Errors:', (float(lerrors) / etotal) , 'Time', time.time()-start timeSpent = time.time()-start totalTimeSpent = time.time()-start1 timeToGo = totalTimeSpent*(numOfSent-iSentence) / iSentence print 'Epoch: %2d sentence number: %6d/%d Loss: %.5f Errors: %.5f Labeled Errors: %.5f Time: %.1f s, total: %.1f s ETA: %.1f s' \ % (epoch+1, iSentence, numOfSent, (eloss / etotal), (float(eerrors) / etotal), (float(lerrors) / etotal), timeSpent, totalTimeSpent, timeToGo ) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 ltotal = 0 if self.use_root: # garder le noeud "*root*" rajoute par read_conll (peut causer plusieurs racines) sentence = sentence[1:] + [sentence[0]] else: sentence = sentence[1:] self.getWordEmbeddings(sentence, True) stack = ParseForest([]) buf = ParseForest(sentence) for root in sentence: root.lstms = [root.vec for _ in xrange(self.nnvecs)] hoffset = 1 if self.headFlag else 0 while len(buf) > 0 or len(stack) > 1 : scores = self.__evaluate(stack, buf, True) scores.append([(None, 3, ninf ,None)]) alpha = stack.roots[:-2] if len(stack) > 2 else [] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] left_cost = ( len([h for h in s1 + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[0]) > 0 else 1 right_cost = ( len([h for h in b + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[1]) > 0 else 1 shift_cost = ( len([h for h in s1 + alpha if h.id == b[0].parent_id]) + len([d for d in s0 + s1 + alpha if d.parent_id == b[0].id]) ) if len(scores[2]) > 0 else 1 costs = (left_cost, right_cost, shift_cost, 1) bestOK = True try: bestValid = max(( s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[0] == stack.roots[-1].relation ) ), key=itemgetter(2)) # print "best",bestValid except: bestOK = False try: bestWrong = max(( s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[0] != stack.roots[-1].relation ) ), key=itemgetter(2)) except: print "wrong", bestWrong bestOK = False # bestValid or bastWrong may fail when chain(*scores) gives an empty list # in this (rare) case we keep the last best # Will crash if the first word has an empty list if bestOK: best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1) ) else bestWrong if best[1] == 2: # we learned a SHIFT stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 0: # we learnded a LEFT ARC child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec elif best[1] == 1: # RIGHT ARC child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) if best[1] != 2 and (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): lerrors += 1 if child.pred_parent_id != child.parent_id: errors += 1 eerrors += 1 etotal += 1 if len(errs) > 50: # or True: #print "too many errors" #eerrs = ((esum(errs)) * (1.0/(float(len(errs))))) eerrs = esum(errs) scalar_loss = eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() self.Init() if len(errs) > 0: eerrs = (esum(errs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() self.trainer.update_epoch() print "Loss: %.4f time spent in epoch %.1f min" % (mloss/iSentence, (time.time()-start1)/60)
def Predict(self, conll_path, is_string=False): conllFP = None #OLD=False #self.use_root=False if is_string: conllFP = StringIO.StringIO(conll_path) else: conllFP = open(conll_path, 'r') #with open(conll_path, 'r') as conllFP: if conllFP: for iSentence, sentence in enumerate(read_conll(conllFP, False)): self.Init() if self.use_root: # garder le noeud "*root*" (peut causer plusieurs racines) sentence = sentence[1:] + [sentence[0]] else: sentence = sentence[1:] #print "aaaa ", sentence[0], type(sentence) self.getWordEmbeddings(sentence, False) stack = ParseForest([]) buf = ParseForest(sentence) for root in sentence: root.lstms = [root.vec for _ in xrange(self.nnvecs)] hoffset = 1 if self.headFlag else 0 cttrans = 0 #print "\n=====" while len(buf) > 0 or len(stack) > 1 : scores = self.__evaluate(stack, buf, False) best = max(chain(*scores), key = itemgetter(2) ) #print "\nBUFFER: ", buf #print "STACK: ", stack #print scores cttrans += 1 # transitions if best[1] == 2: # SHIFT stack.roots.append(buf.roots[0]) del buf.roots[0] #print cttrans, "SHIFT" elif best[1] == 0: # LEFT (?) ARC #print cttrans, "LEFT" child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] #print child, child.form, child.pred_parent_id bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec elif best[1] == 1: # RIGHT ARC #print cttrans, "RIGHT" child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec renew_cg() if self.use_root: yield [sentence[-1]] + sentence[:-1] else: # write_conll coupe le premier mot, il faut mettre qq chose ici yield [sentence[-1]] + sentence
def Train(self, conll_path): mloss = 0.0 errors = 0 batch = 0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ltotal = 0 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, True)) random.shuffle(shuffledData) errs = [] eeloss = 0.0 self.Init() for iSentence, sentence in enumerate(shuffledData): if iSentence % 100 == 0 and iSentence != 0: print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', ( float(eerrors)) / etotal, 'Labeled Errors:', ( float(lerrors) / etotal), 'Time', time.time() - start start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 ltotal = 0 forest = ParseForest(sentence) self.getWordEmbeddings(forest, True) for root in forest.roots: root.lstms = [ self.builders[0].initial_state().add_input(root.vec), self.builders[1].initial_state().add_input(root.vec) ] unassigned = { entry.id: sum([ 1 for pentry in sentence if pentry.parent_id == entry.id ]) for entry in sentence } while len(forest.roots) > 1: self.__evaluate(forest, True) bestValidOp, bestValidScore = None, float("-inf") bestWrongOp, bestWrongScore = None, float("-inf") bestValidParent, bestValidChild = None, None bestValidIndex, bestWrongIndex = None, None roots = forest.roots rootsIds = set([root.id for root in roots]) for i in xrange(len(forest.roots) - 1): for irel, rel in enumerate(self.irels): for op in xrange(2): child = i + (1 - op) parent = i + op oracleCost = unassigned[roots[child].id] + ( 0 if roots[child].parent_id not in rootsIds or roots[child].parent_id == roots[parent].id else 1) if oracleCost == 0 and ( roots[child].parent_id != roots[parent].id or roots[child].relation == rel): if bestValidScore < forest.roots[i].scores[ irel][op]: bestValidScore = forest.roots[ i].scores[irel][op] bestValidOp = op bestValidParent, bestValidChild = parent, child bestValidIndex = i bestValidIRel, bestValidRel = irel, rel bestValidExpr = roots[ bestValidIndex].exprs[ bestValidIRel][bestValidOp] elif bestWrongScore < forest.roots[i].scores[ irel][op]: bestWrongScore = forest.roots[i].scores[ irel][op] bestWrongParent, bestWrongChild = parent, child bestWrongOp = op bestWrongIndex = i bestWrongIRel, bestWrongRel = irel, rel bestWrongExpr = roots[ bestWrongIndex].exprs[bestWrongIRel][ bestWrongOp] if bestValidScore < bestWrongScore + 1.0: loss = bestWrongExpr - bestValidExpr mloss += 1.0 + bestWrongScore - bestValidScore eloss += 1.0 + bestWrongScore - bestValidScore errs.append(loss) if not self.oracle or bestValidScore - bestWrongScore > 1.0 or ( bestValidScore > bestWrongScore and random.random() > 0.1): selectedOp = bestValidOp selectedParent = bestValidParent selectedChild = bestValidChild selectedIndex = bestValidIndex selectedIRel, selectedRel = bestValidIRel, bestValidRel else: selectedOp = bestWrongOp selectedParent = bestWrongParent selectedChild = bestWrongChild selectedIndex = bestWrongIndex selectedIRel, selectedRel = bestWrongIRel, bestWrongRel if roots[selectedChild].parent_id != roots[ selectedParent].id or selectedRel != roots[ selectedChild].relation: lerrors += 1 if roots[selectedChild].parent_id != roots[ selectedParent].id: errors += 1 eerrors += 1 etotal += 1 for j in xrange( max(0, selectedIndex - self.k - 1), min(len(forest.roots), selectedIndex + self.k + 2)): roots[j].scores = None unassigned[roots[selectedChild].parent_id] -= 1 roots[selectedParent].lstms[selectedOp] = roots[ selectedParent].lstms[selectedOp].add_input( self.activation(self.lstm2lstm * noise( concatenate([ roots[selectedChild].lstms[0].output(), lookup(self.model["rels-lookup"], selectedIRel), roots[selectedChild].lstms[1].output() ]), 0.0) + self.lstm2lstmbias)) forest.Attach(selectedParent, selectedChild) if len(errs) > 50.0: eerrs = ((esum(errs)) * (1.0 / (float(len(errs))))) scalar_loss = eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() self.Init() if len(errs) > 0: eerrs = (esum(errs)) * (1.0 / (float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() self.trainer.update_epoch() print "Loss: ", mloss / iSentence
def Train(self, trainData): mloss = 0.0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ninf = -float('inf') beg = time.time() start = time.time() random.shuffle( trainData ) # in certain cases the data will already have been shuffled after being read from file or while creating dev data print "Length of training data: ", len(trainData) errs = [] self.feature_extractor.Init() for iSentence, sentence in enumerate(trainData, 1): if iSentence % 100 == 0: loss_message = 'Processing sentence number: %d'%iSentence + \ ' Loss: %.3f'%(eloss / etotal)+ \ ' Errors: %.3f'%((float(eerrors)) / etotal)+\ ' Labeled Errors: %.3f'%(float(lerrors) / etotal)+\ ' Time: %.2gs'%(time.time()-start) print loss_message start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 sentence = deepcopy( sentence ) # ensures we are working with a clean copy of sentence and allows memory to be recycled each time round the loop conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.feature_extractor.getWordEmbeddings(conll_sentence, True) stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 lang = conll_sentence[1].language_id for root in conll_sentence: root.lstms = [root.vec] if self.headFlag else [] if not self.feature_extractor.multiling or self.feature_extractor.shareWordLookup: root.lstms += [ self.feature_extractor.paddingVec for _ in range(self.nnvecs - hoffset) ] else: root.lstms += [ self.feature_extractor.paddingVecs[lang] for _ in range(self.nnvecs - hoffset) ] while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, True) #to ensure that we have at least one wrong operation scores.append([(None, 4, ninf, None)]) stack_ids = [sitem.id for sitem in stack.roots] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] costs, shift_case = self.calculate_cost( scores, s0, s1, b, beta, stack_ids) bestValid = list( (s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[1] == 3 or s[0] == s0[0].relation))) if len(bestValid) < 1: print "===============dropping a sentence===============" break bestValid = max(bestValid, key=itemgetter(2)) bestWrong = max( (s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[1] != 3 and s[0] != s0[0].relation)), key=itemgetter(2)) #force swap if costs[3] == 0: best = bestValid else: #select a transition to follow # + aggresive exploration if bestWrong[1] == 3: best = bestValid else: best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1)) else bestWrong #updates for the dynamic oracle if best[1] == 2: #SHIFT if shift_case == 2: if b[0].parent_entry.id in stack_ids[:-1] and b[ 0].id in b[0].parent_entry.rdeps: b[0].parent_entry.rdeps.remove(b[0].id) blocked_deps = [ d for d in b[0].rdeps if d in stack_ids ] for d in blocked_deps: b[0].rdeps.remove(d) elif best[1] == 0 or best[1] == 1: #LA or RA child = s0[0] s0[0].rdeps = [] if s0[0].id in s0[0].parent_entry.rdeps: s0[0].parent_entry.rdeps.remove(s0[0].id) self.apply_transition(best, stack, buf, hoffset) if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) #labeled errors if best[1] != 2 and best[1] != 3 and ( child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): lerrors += 1 #attachment error if child.pred_parent_id != child.parent_id: eerrors += 1 if best[1] == 0 or best[1] == 2: etotal += 1 #footnote 8 in Eli's original paper if len(errs) > 50: # or True: eerrs = dy.esum(errs) scalar_loss = eerrs.scalar_value() #forward eerrs.backward() self.trainer.update() errs = [] lerrs = [] dy.renew_cg() self.feature_extractor.Init() if len(errs) > 0: eerrs = (dy.esum(errs)) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] dy.renew_cg() self.trainer.update() print "Loss: ", mloss / iSentence print "Total Training Time: %.2gs" % (time.time() - beg)
def Predict(self, treebanks, datasplit, options): reached_max_swap = 0 char_map = {} if options.char_map_file: char_map_fh = open(options.char_map_file, encoding='utf-8') char_map = json.loads(char_map_fh.read()) # should probably use a namedtuple in get_vocab to make this prettier _, test_words, test_chars, _, _, _, test_treebanks, test_langs = utils.get_vocab( treebanks, datasplit, char_map) # get external embeddings for the set of words and chars in the # test vocab but not in the training vocab test_embeddings = defaultdict(lambda: {}) if options.word_emb_size > 0 and options.ext_word_emb_file: new_test_words = \ set(test_words) - self.feature_extractor.words.keys() print("Number of OOV word types at test time: %i (out of %i)" % (len(new_test_words), len(test_words))) if len(new_test_words) > 0: # no point loading embeddings if there are no words to look for for lang in test_langs: embeddings = utils.get_external_embeddings( options, emb_file=options.ext_word_emb_file, lang=lang, words=new_test_words) test_embeddings["words"].update(embeddings) if len(test_langs) > 1 and test_embeddings["words"]: print("External embeddings found for %i words "\ "(out of %i)" % \ (len(test_embeddings["words"]), len(new_test_words))) if options.char_emb_size > 0: new_test_chars = \ set(test_chars) - self.feature_extractor.chars.keys() print("Number of OOV char types at test time: %i (out of %i)" % (len(new_test_chars), len(test_chars))) if len(new_test_chars) > 0: for lang in test_langs: embeddings = utils.get_external_embeddings( options, emb_file=options.ext_char_emb_file, lang=lang, words=new_test_chars, chars=True) test_embeddings["chars"].update(embeddings) if len(test_langs) > 1 and test_embeddings["chars"]: print("External embeddings found for %i chars "\ "(out of %i)" % \ (len(test_embeddings["chars"]), len(new_test_chars))) data = utils.read_conll_dir(treebanks, datasplit, char_map=char_map) for iSentence, osentence in enumerate(data, 1): sentence = deepcopy(osentence) reached_swap_for_i_sentence = False max_swap = 2 * len(sentence) iSwap = 0 self.feature_extractor.Init(options) conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.feature_extractor.getWordEmbeddings(conll_sentence, False, options, test_embeddings) stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 for root in conll_sentence: #empty = dy.zeros(2*options.lstm_output_size) root.lstms = [root.vec] if self.headFlag else [] root.lstms += [root.vec for _ in range(self.nnvecs - hoffset)] root.relation = root.relation if root.relation in self.irels else 'runk' while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, False) best = max( chain(*(scores if iSwap < max_swap else scores[:3])), key=itemgetter(2)) if iSwap == max_swap and not reached_swap_for_i_sentence: reached_max_swap += 1 reached_swap_for_i_sentence = True print("reached max swap in %d out of %d sentences" % (reached_max_swap, iSentence)) self.apply_transition(best, stack, buf, hoffset) if best[1] == SWAP: iSwap += 1 dy.renew_cg() #keep in memory the information we need, not all the vectors oconll_sentence = [ entry for entry in osentence if isinstance(entry, utils.ConllEntry) ] oconll_sentence = oconll_sentence[1:] + [oconll_sentence[0]] for tok_o, tok in zip(oconll_sentence, conll_sentence): tok_o.pred_relation = tok.pred_relation tok_o.pred_parent_id = tok.pred_parent_id yield osentence
def Train(self, conll_path): mloss = 0.0 errors = 0 batch = 0 eloss = 0.0 eerrors = 0 lerrors = 0 etotal = 0 ltotal = 0 ninf = -float('inf') hoffset = 1 if self.headFlag else 0 start = time.time() with open(conll_path, 'r') as conllFP: shuffledData = list(read_conll(conllFP, True)) random.shuffle(shuffledData) errs = [] eeloss = 0.0 self.Init() for iSentence, sentence in enumerate(shuffledData): if iSentence % 100 == 0 and iSentence != 0: print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', ( float(eerrors)) / etotal, 'Labeled Errors:', ( float(lerrors) / etotal), 'Time', time.time() - start start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 lerrors = 0 ltotal = 0 conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.getWordEmbeddings(conll_sentence, True) stack = ParseForest([]) buf = ParseForest(conll_sentence) for root in conll_sentence: root.lstms = [root.vec for _ in xrange(self.nnvecs)] hoffset = 1 if self.headFlag else 0 while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, True) scores.append([(None, 3, ninf, None)]) alpha = stack.roots[:-2] if len(stack) > 2 else [] s1 = [stack.roots[-2]] if len(stack) > 1 else [] s0 = [stack.roots[-1]] if len(stack) > 0 else [] b = [buf.roots[0]] if len(buf) > 0 else [] beta = buf.roots[1:] if len(buf) > 1 else [] left_cost = ( len([h for h in s1 + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[0]) > 0 else 1 right_cost = ( len([h for h in b + beta if h.id == s0[0].parent_id]) + len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[1]) > 0 else 1 shift_cost = ( len([h for h in s1 + alpha if h.id == b[0].parent_id]) + len([ d for d in s0 + s1 + alpha if d.parent_id == b[0].id ])) if len(scores[2]) > 0 else 1 costs = (left_cost, right_cost, shift_cost, 1) bestValid = max( (s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[0] == stack.roots[-1].relation)), key=itemgetter(2)) bestWrong = max( (s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[0] != stack.roots[-1].relation)), key=itemgetter(2)) best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1)) else bestWrong if best[1] == 2: stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 0: child = stack.roots.pop() parent = buf.roots[0] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 0 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec elif best[1] == 1: child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] bestOp = 1 if self.rlMostFlag: parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] if self.rlFlag: parent.lstms[bestOp + hoffset] = child.vec if bestValid[2] < bestWrong[2] + 1.0: loss = bestWrong[3] - bestValid[3] mloss += 1.0 + bestWrong[2] - bestValid[2] eloss += 1.0 + bestWrong[2] - bestValid[2] errs.append(loss) if best[1] != 2 and ( child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): lerrors += 1 if child.pred_parent_id != child.parent_id: errors += 1 eerrors += 1 etotal += 1 if len(errs) > 50: # or True: #eerrs = ((esum(errs)) * (1.0/(float(len(errs))))) eerrs = esum(errs) scalar_loss = eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() self.Init() if len(errs) > 0: eerrs = (esum(errs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() # self.trainer.update_epoch() # hanwj 6.20 . there is no any decay, so just remove it. # self.trainer.learning_rate /= (1 - rate_decay) print "Loss: ", mloss / iSentence
def Predict(self, data): reached_max_swap = 0 for iSentence, sentence in data: reached_swap_for_i_sentence = False max_swap = 2 * len(sentence) iSwap = 0 self.Init() conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentence = conll_sentence[1:] + [conll_sentence[0]] self.getWordEmbeddings(conll_sentence, False) stack = ParseForest([]) buf = ParseForest(conll_sentence) hoffset = 1 if self.headFlag else 0 for root in conll_sentence: root.lstms = [root.vec] if self.headFlag else [] root.lstms += [ self.paddingVec for _ in range(self.nnvecs - hoffset) ] root.relation = root.relation if root.relation in self.rels else 'runk' while not (len(buf) == 1 and len(stack) == 0): scores = self.__evaluate(stack, buf, False) best = max( chain(*(scores if iSwap < max_swap else scores[:3])), key=itemgetter(2)) if iSwap == max_swap and not reached_swap_for_i_sentence: reached_max_swap += 1 reached_swap_for_i_sentence = True print "reached max swap in %d out of %d sentences" % ( reached_max_swap, iSentence) if best[1] == 2: #SHIFT stack.roots.append(buf.roots[0]) del buf.roots[0] elif best[1] == 3: #SWAP iSwap += 1 child = stack.roots.pop() buf.roots.insert(1, child) elif best[1] == 0: #LEFT-ARC child = stack.roots.pop() parent = buf.roots[0] #predict rel and label child.pred_parent_id = parent.id child.pred_relation = best[0] elif best[1] == 1: #RIGHT-ARC child = stack.roots.pop() parent = stack.roots[-1] child.pred_parent_id = parent.id child.pred_relation = best[0] #update the representation of head for attaching transitions if best[1] == 0 or best[1] == 1: #linear order if self.rlMostFlag: parent.lstms[best[1] + hoffset] = child.lstms[best[1] + hoffset] #actual children if self.rlFlag: parent.lstms[best[1] + hoffset] = child.vec dy.renew_cg() yield sentence