def _parse_pair(self, tokens, i): # print 'parsing pair at ' + str(i) t1 = self._next_token(tokens, i, True) if t1 in _special_tokens: raise ValueError('expected identifier, not ' + t1) t2 = t1 j = i + 1 if self._next_token(tokens, j) == ':': t2 = self._next_token(tokens, j+1, True) if t2 in _special_tokens: raise ValueError('expected identifier, not ' + t2) j = j + 2 tree = Tree('Pair', tokens[i:j]) else: tree = Tree('Pair', [tokens[i]]) #print str(self._pair_from_tree(tree)) + ' from ' + str(i) + ' to ' + str(j) return (j, tree)
def _parse_singleton(self, tokens, i): # print 'parsing singleton at ' + str(i) t = self._next_token(tokens, i, True) j = i result = None if t == '(': (j, result) = self._parse_list(tokens, i + 1, 'Cons') if result == None: raise ValueError('missing contents of (...)') t = self._next_token(tokens, j, True) if t != ')': raise ValueError('missing final parenthesis, instead found ' + t) j = j + 1 elif t == '[': (j, result) = self._parse_list(tokens, i + 1, 'Or') if result == None: raise ValueError('missing contents of [...]') t = self._next_token(tokens, j, True) if t != ']': raise ValueError('missing final bracket, instead found ' + t) j = j + 1 elif t in _special_tokens: raise ValueError('expected identifier, found ' + t) else: (j, tree) = self._parse_pair(tokens, i) result = tree t = self._next_token(tokens, j) if t in ['*', '&', '?']: j = j + 1 result = Tree(t, [result]) return (j, result)
def deptree(self): """ Starting with the C{root} node, build a dependency tree using the NLTK L{Tree} constructor. Dependency labels are omitted. """ node = self.root word = node['word'] deps = node['deps'] return Tree(word, [self._deptree(i) for i in deps])
def _parse_list(self, tokens, i, type='Cons'): # print 'parsing list at ' + str(i) t = self._next_token(tokens, i) if t == None or t in _non_list_initial_special_tokens: # print ' failing immediately ' return (i, None) (j, s) = self._parse_singleton(tokens, i) (k, r) = self._parse_list(tokens, j, type) # print (k,r) if r == None: # print ' returning (%d, %s)' % (j, s) return (j, s) tree = Tree(type, [s, r]) # print ' returning (%d, %s)' % (k, tree) return (k, tree)
def _deptree(self, i): """ Recursive function for turning dependency graphs into NLTK trees. @type i: C{int} @param i: index of a node in C{nodelist} @return: either a word (if the indexed node is a leaf) or a L{Tree}. """ node = self.nodelist[i] word = node['word'] deps = node['deps'] if len(deps) == 0: return word else: return Tree(word, [self._deptree(j) for j in deps])