def check_puncs(forest, pos2): '''returns idx_mapping and the modified forest. CAUTION: can not modify the original forest. CAUTION: delete redundant layers''' newforest = forest.copy() assert len(newforest) == len(pos2), "different sentence lengths!\n%s\n%s" % (str(test_tree), str(gold_tree)) idx_mapping = {} j = 0 last_is_punc = True for i, a in enumerate(pos2): if not last_is_punc: j += 1 idx_mapping [i] = j # delete the non-consistent tags of this word in the newforest for node in newforest.cells[(i, i+1)]: if node.is_terminal() or node.sp_terminal(): node.deleted = is_punc(node.label) ^ is_punc(a) if node.deleted: print >> logs, node, "deleted" last_is_punc = is_punc(a) if not last_is_punc: j += 1 idx_mapping[i+1] = j # TODO: CLEAN UP THIS PART! newforest.nodeorder = [node for node in newforest if not node.is_terminal() or not node.deleted] newforest.nodes = {} for node in newforest: newforest.nodes[node.iden] = node newforest.rehash() for node in newforest: if not node.is_terminal(): mapped_span = node.mapped_span(idx_mapping) newedges = [] for edge in node.edges: if edge.unary_cycle(): print >> logs, edge, "deleted (cycle)" else: for sub in edge.subs: if sub.is_terminal() and sub.deleted: print >> logs, edge, "deleted (punc)" break if not node.is_root() and sub.label == node.label \ and sub.mapped_span(idx_mapping) == mapped_span: ## make sure no induced unary cycle print >> logs, edge, "deleted (induced cycle)" break else: newedges.append(edge) node.edges = newedges return lambda x:idx_mapping[x], newforest
def prepare_stuff(self, label, wrd=None, sym=True): self._coordination = None ## to be evaluated once called (same as C++'s const) self._str = None ## heads-info self.headinfo = { heads.SEM: heads.HeadInfo(), heads.SYN: heads.HeadInfo() } if wrd is not None: self.word = symbol(wrd) if sym else wrd self._terminal = True self._punctuation = is_punc(self.label) self._conjunction = is_conj(self.label) self.word_seq = [self.word] self.tag_seq = [label] else: self._terminal = False self._punctuation = False self._conjunction = False self.word_seq = [] self.tag_seq = []
def prepare_stuff(self, label, wrd=None, sym=True): self._coordination = None ## to be evaluated once called (same as C++'s const) self._str = None ## heads-info self.headinfo = { heads.SEM: heads.HeadInfo(), heads.SYN: heads.HeadInfo()} if wrd is not None: self.word = symbol(wrd) if sym else wrd self._terminal = True self._punctuation = is_punc(self.label) self._conjunction = is_conj(self.label) self.word_seq = [self.word] self.tag_seq = [label] else: self._terminal = False self._punctuation = False self._conjunction = False self.word_seq = [] self.tag_seq = []
def is_punctuation(self): return is_punc(self.label)