def __init__(self, conf: ExtenderConf, **kwargs): super().__init__(conf, **kwargs) conf: ExtenderConf = self.conf # -- self.ext_span_getter = Mention.create_span_getter(conf.ext_span_mode) self.ext_span_setter = Mention.create_span_setter(conf.ext_span_mode) self.eenc = PlainEncoder(conf.eenc_conf, input_dim=conf.isize) self.enode = SpanExpanderNode(conf.econf, isize=conf.isize, psize=(conf.psize if conf.ext_use_finput else -1))
def __init__(self, conf: MySRLConf, vocab_evt: SimpleVocab, vocab_arg: SimpleVocab): self.conf = conf self.vocab_evt = vocab_evt self.vocab_arg = vocab_arg # -- self.evt_span_getter = Mention.create_span_getter(conf.evt_span_mode) self.evt_span_setter = Mention.create_span_setter(conf.evt_span_mode) self.arg_span_getter = Mention.create_span_getter(conf.arg_span_mode) self.arg_span_setter = Mention.create_span_setter(conf.arg_span_mode)
def __init__(self, conf: BaseExtractorConf, vocab: SimpleVocab): self.conf = conf # borrow conf self.core_span_getter = Mention.create_span_getter(conf.core_span_mode) self.core_span_setter = Mention.create_span_setter(conf.core_span_mode) self.vocab = vocab # -- if conf.ftag == "arg": self._get_f = self._get_args self._clear_f = lambda inst: inst.clear_args() # first delete all args if existing self._new_f = self._new_arg else: self._get_f = self._get_frames self._clear_f = lambda inst: inst.delete_frames(conf.ftag) # first delete all frames if existing self._new_f = self._new_frame
def __init__(self, conf: FrameEvalConf): super().__init__(conf) conf: FrameEvalConf = self.conf # -- # functions self.frame_getter = _FRAME_GETTERS.get(conf.frame_getter) if self.frame_getter is None: # shortcut or eval! self.frame_getter = eval(conf.frame_getter) self.span_getter_frame = Mention.create_span_getter(conf.span_mode_frame) self.span_getter_arg = Mention.create_span_getter(conf.span_mode_arg) # special self.fpair_filter_f = eval(conf.fpair_filter) # -- self.current_result = FrameEvalResult.zero(conf)
def __init__(self, conf: PostProcessConf, **kwargs): super().__init__(conf, **kwargs) conf: PostProcessConf = self.conf # -- if conf.pp_brule_semafor: from ..rule_target import BlacklistRule_semafor self.s_brule = BlacklistRule_semafor() else: self.s_brule = None if conf._ftag == "arg": self._list_f = lambda x: x.args self._del_f = lambda f, x: x.delete_self() else: self._list_f = lambda x: x.get_frames(conf._ftag) self._del_f = lambda s, x: s.delete_frame(x, conf._ftag) # -- self.span_getter = Mention.create_span_getter(conf._span_mode) self.full_span_getter = Mention.create_span_getter("span") # used for check overlapping!
def score_mention_pair(m1: Mention, m2: Mention, span_getter: Callable) -> float: if m1.sent.sid != m2.sent.sid: # first need sent match return 0. elif hasattr(m1, "excluded_idxes") or hasattr(m2, "excluded_idxes"): # todo(+N): ugly fix!! # start1, len1 = span_getter(m1) # start2, len2 = span_getter(m2) # todo(+W): currently simply get the whole span for these faked ones! start1, len1 = m1.get_span() start2, len2 = m2.get_span() empty_set = set() s1 = set(range(start1, start1+len1)).difference(getattr(m1, "excluded_idxes", empty_set)) s2 = set(range(start2, start2+len2)).difference(getattr(m2, "excluded_idxes", empty_set)) score = len(s1.intersection(s2)) / len(s1.union(s2)) return score else: # [start, len) start1, len1 = span_getter(m1) start2, len2 = span_getter(m2) overlap = min(start1+len1, start2+len2) - max(start1, start2) overlap = max(0, overlap) # overlapped tokens score = overlap / (len1 + len2 - overlap) # using Jaccard Index return score
def _parse_mention(self, mention: Dict, doc: Doc) -> Mention: # get mention main_posi_info = mention.get("posi") if main_posi_info is None: return None # no posi info!! sid, widx, wlen = self._read_posi(main_posi_info) ret = Mention.create(doc.sents[sid], widx, wlen) # possible head span? head_posi_info = mention.get("head", {}).get("posi") if head_posi_info is not None: head_sid, head_widx, head_wlen = self._read_posi(head_posi_info) if head_sid != sid or not (head_widx>=widx and head_widx+head_wlen<=widx+wlen): zwarn(f"Error in head: {head_posi_info} vs. {main_posi_info}") else: # make sure things are correct! otherwise simply discard!! ret.set_span(head_widx, head_wlen, hspan=True) return ret
def _fake_discontinuous_ef(self, all_mentions: List): # todo(+N): for simplicity, special support for it!! # simply make the fake ones but no adding!! start_widx, end_widx = all_mentions[0].widx, all_mentions[0].wridx for m in all_mentions[1:]: if m.sent is all_mentions[0].sent: # expand start_widx = min(start_widx, m.widx) end_widx = max(end_widx, m.wridx) fake_ef = Frame.create(mention=Mention.create(all_mentions[0].sent, start_widx, end_widx - start_widx)) # -- excluded_idxes = set(range(start_widx, end_widx)) for m in all_mentions: for i in range(m.widx, m.wridx): if i in excluded_idxes: # there can be repeated mentions excluded_idxes.remove(i) fake_ef.mention.excluded_idxes = excluded_idxes # todo(+N): ugly fix!! return fake_ef