def loadConll(inFile): sents = [] trees = [] with open('data/' + inFile, 'rb') as fin: sentenceTokens = [] tree = DependencyTree() for line in fin: line = line.strip() line = line.split('\t') if len(line) < 10: if len(sentenceTokens) > 0: trees.append(tree) sents.append(sentenceTokens) tree = DependencyTree() sentenceTokens = [] else: word = line[1] pos = line[4] head = int(line[6]) depType = line[7] token = {} token['word'] = word token['POS'] = pos token['head'] = head token['depType'] = depType sentenceTokens.append(token) tree.add(head, depType) return sents, trees
def loadConll(inFile): sents = [] trees = [] counter = 0 with open('data/' + inFile, 'r') as fin: sentenceTokens = [] tree = DependencyTree() for line in fin: line = line.strip() line = line.split('\t') if len(line) < 10: if len(sentenceTokens) > 0: trees.append(tree) sents.append(sentenceTokens) tree = DependencyTree() sentenceTokens = [] else: # Added try block to handle exception encountered in python 3 try: word = line[1] pos = line[4] head = int(line[6]) depType = line[7] token = {} token['word'] = word token['POS'] = pos token['head'] = head token['depType'] = depType sentenceTokens.append(token) except: continue tree.add(head, depType) return sents, trees
def __init__(self, sentence): self.stack = [] self.buffer = [] self.tree = DependencyTree() self.sentence = sentence
class Configuration: def __init__(self, sentence): self.stack = [] self.buffer = [] self.tree = DependencyTree() self.sentence = sentence def shift(self): k = self.getBuffer(0) if k == Config.NONEXIST: return False self.buffer.pop(0) self.stack.append(k) return True def removeSecondTopStack(self): nStack = self.getStackSize() if nStack < 2: return False self.stack.pop(-2) return True def removeTopStack(self): nStack = self.getStackSize() if nStack <= 1: return False self.stack.pop() return True def getStackSize(self): return len(self.stack) def getBufferSize(self): return len(self.buffer) def getSentenceSize(self): return len(self.sentence) def getHead(self, k): return self.tree.getHead(k) def getLabel(self, k): return self.tree.getLabel(k) def getStack(self, k): """ Get the token index of the kth word on the stack. If stack doesn't have an element at this index, return Config.NONEXIST """ nStack = self.getStackSize() if k >= 0 and k < nStack: return self.stack[nStack - 1 - k] else: return Config.NONEXIST def getBuffer(self, k): """ Get the token index of the kth word on the buffer. If buffer doesn't have an element at this index, return Config.NONEXIST """ if k >= 0 and k < self.getBufferSize(): return self.buffer[k] else: return Config.NONEXIST def getWord(self, k): """ Get the word at index k """ if k == 0: return Config.ROOT else: k -= 1 if k < 0 or k >= len(self.sentence): return Config.NULL else: return self.sentence[k]['word'] def getPOS(self, k): """ Get the POS at index k """ if k == 0: return Config.ROOT else: k -= 1 if k < 0 or k >= len(self.sentence): return Config.NULL else: return self.sentence[k]['POS'] def addArc(self, h, t, l): """ Add an arc with the label l from the head node h to the dependent node t. """ self.tree.set(t, h, l) def getLeftChild(self, k, cnt): """ Get cnt-th leftmost child of k. (i.e., if cnt = 1, the leftmost child of k will be returned, if cnt = 2, the 2nd leftmost child of k will be returned.) """ if k < 0 or k > self.tree.n: return Config.NONEXIST c = 0 for i in range(1, k): if self.tree.getHead(i) == k: c += 1 if c == cnt: return i return Config.NONEXIST def getRightChild(self, k, cnt): """ Get cnt-th rightmost child of k. (i.e., if cnt = 1, the rightmost child of k will be returned, if cnt = 2, the 2nd rightmost child of k will be returned.) """ if k < 0 or k > self.tree.n: return Config.NONEXIST c = 0 for i in range(self.tree.n, k, -1): if self.tree.getHead(i) == k: c += 1 if c == cnt: return i return Config.NONEXIST def hasOtherChild(self, k, goldTree): for i in range(1, self.tree.n + 1): if goldTree.getHead(i) == k and self.tree.getHead(i) != k: return True return False def getStr(self): """ Returns a string that concatenates all elements on the stack and buffer, and head / label """ s = "[S]" for i in range(self.getStackSize()): if i > 0: s += "," s += self.stack[i] s += "[B]" for i in range(self.getBufferSize()): if i > 0: s += "," s += self.buffer[i] s += "[H]" for i in range(1, self.tree.n + 1): if i > 1: s += "," s += self.getHead(i) + "(" + self.getLabel(i) + ")" return s