def __init__(self, iden, labelspan, size, fvector, sent): # NP [0-3] self.iden = iden label, span = labelspan.split() self.span = tuple(map(int, span[1:-1].split("-"))) if label[-1] == "*": label = label[:-1] self._spurious = True else: self._spurious = False self.label = "TOP" if label == "S1" else label self.label = symbol(self.label) self.edges = [] word = sent[self.span[0]] if (size == 0) else None self.prepare_stuff(label, word) self.fvector = fvector self._root = False self._bin_len = None
def __init__(self, label, span, wrd=None, subs=None, is_root=False, sym=True): self.parentlabel = None ### TODO: FIX THIS! self.label = symbol(label) if sym else label ## in forest.assemble, don't symbol again self.span = span assert (wrd is None) ^ (subs is None), "bad tree" self.prepare_stuff(label, wrd, sym) if not self._terminal: self.subs = subs for sub in subs: self.word_seq += sub.word_seq self.tag_seq += sub.tag_seq self._root = is_root ## features self._bin_len = None # for heads feature self.allheads = {} self.ccheads = {} self.twolevels = {} # for headtree feature self.headspath = {}
def __init__(self, label, span, wrd=None, subs=None, is_root=False, sym=True): self.parentlabel = None ### TODO: FIX THIS! self.label = symbol( label) if sym else label ## in forest.assemble, don't symbol again self.span = span assert (wrd is None) ^ (subs is None), "bad tree" self.prepare_stuff(label, wrd, sym) if not self._terminal: self.subs = subs for sub in subs: self.word_seq += sub.word_seq self.tag_seq += sub.tag_seq self._root = is_root ## features self._bin_len = None # for heads feature self.allheads = {} self.ccheads = {} self.twolevels = {} # for headtree feature self.headspath = {}
def prepare_stuff(self, label, wrd=None, sym=True): self._coordination = None ## to be evaluated once called (same as C++'s const) self._str = None ## heads-info self.headinfo = { heads.SEM: heads.HeadInfo(), heads.SYN: heads.HeadInfo() } if wrd is not None: self.word = symbol(wrd) if sym else wrd self._terminal = True self._punctuation = is_punc(self.label) self._conjunction = is_conj(self.label) self.word_seq = [self.word] self.tag_seq = [label] else: self._terminal = False self._punctuation = False self._conjunction = False self.word_seq = [] self.tag_seq = []
def __init__(self, iden, labelspan, size, fvector, sent): # NP [0-3] self.iden = iden label, span = labelspan.split() self.span = tuple(map(int, span[1:-1].split("-"))) if label[-1] == "*": label = label[:-1] self._spurious = True else: self._spurious = False self.label = "TOP" if label == "S1" else label self.label = symbol(self.label) self.edges = [] #new features self.frags = [] #self.tfedges = [] #new feature: subtree str created for bp rules, NP(NN 'ch') -> lhs(bp) ### feats self.subtree = '' ## N.B.: parse forest node can be termllinal word = sent[self.span[0]] if (size == 0) else None ## now in MT forest, nodes are always non-final. hyperedges can be final (terminal). ## in tree.py self.prepare_stuff(label, word) self.fvector = fvector self._root = False self._bin_len = None # surface string self.surface = '%s' % ''.join(sent[self.span[0]:self.span[1]]) self._hash = hash(self.iden)
def prepare_stuff(self, label, wrd=None, sym=True): self._coordination = None ## to be evaluated once called (same as C++'s const) self._str = None ## heads-info self.headinfo = { heads.SEM: heads.HeadInfo(), heads.SYN: heads.HeadInfo()} if wrd is not None: self.word = symbol(wrd) if sym else wrd self._terminal = True self._punctuation = is_punc(self.label) self._conjunction = is_conj(self.label) self.word_seq = [self.word] self.tag_seq = [label] else: self._terminal = False self._punctuation = False self._conjunction = False self.word_seq = [] self.tag_seq = []