def _parse_mention(self, mention: Dict, doc: Doc) -> Mention: # get mention main_posi_info = mention.get("posi") if main_posi_info is None: return None # no posi info!! sid, widx, wlen = self._read_posi(main_posi_info) ret = Mention.create(doc.sents[sid], widx, wlen) # possible head span? head_posi_info = mention.get("head", {}).get("posi") if head_posi_info is not None: head_sid, head_widx, head_wlen = self._read_posi(head_posi_info) if head_sid != sid or not (head_widx>=widx and head_widx+head_wlen<=widx+wlen): zwarn(f"Error in head: {head_posi_info} vs. {main_posi_info}") else: # make sure things are correct! otherwise simply discard!! ret.set_span(head_widx, head_wlen, hspan=True) return ret
def _fake_discontinuous_ef(self, all_mentions: List): # todo(+N): for simplicity, special support for it!! # simply make the fake ones but no adding!! start_widx, end_widx = all_mentions[0].widx, all_mentions[0].wridx for m in all_mentions[1:]: if m.sent is all_mentions[0].sent: # expand start_widx = min(start_widx, m.widx) end_widx = max(end_widx, m.wridx) fake_ef = Frame.create(mention=Mention.create(all_mentions[0].sent, start_widx, end_widx - start_widx)) # -- excluded_idxes = set(range(start_widx, end_widx)) for m in all_mentions: for i in range(m.widx, m.wridx): if i in excluded_idxes: # there can be repeated mentions excluded_idxes.remove(i) fake_ef.mention.excluded_idxes = excluded_idxes # todo(+N): ugly fix!! return fake_ef