def for_intra(dpack, target): """Adapt a datapack to intrasentential decoding. An intrasentential datapack is almost identical to its original, except that we set the label for each ('ROOT', edu) pairing to 'ROOT' if that edu is a subgrouping head (if it has no parents other than 'ROOT' within its subgrouping). This should be done before either `for_labelling` or `for_attachment` Returns ------- dpack : DataPack target : array(int) """ # map EDUs to subgroup ids ; intra = pairs of EDUs with same subgroup id grp = {e.id: e.subgrouping for e in dpack.edus} # find all edus that have intra incoming edges (to rule out) unrelated = dpack.label_number(UNRELATED) intra_tgts = defaultdict(set) for i, (edu1, edu2) in enumerate(dpack.pairings): if (grp[edu1.id] == grp[edu2.id] and target[i] != unrelated): # edu2 has an incoming relation => not an (intra) root intra_tgts[grp[edu2.id]].add(edu2.id) # pick out the (fakeroot, edu) pairs where edu does not have # incoming intra edges all_heads = [i for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.id == FAKE_ROOT_ID and edu2.id not in intra_tgts[grp[edu2.id]])] # NEW pick out the original inter-sentential links, for removal inter_links = [i for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.id != FAKE_ROOT_ID and grp[edu1.id] != grp[edu2.id] and target[i] != unrelated)] # update datapack and target accordingly new_target = np.copy(dpack.target) new_target[all_heads] = dpack.label_number('ROOT') new_target[inter_links] = unrelated # NEW # WIP ctarget new_ctarget = {grp_name: ctgt for grp_name, ctgt in dpack.ctarget.items()} # FIXME replace each ctgt with the list of intra-sentential # RST (sub)trees # end WIP ctarget dpack = DataPack(edus=dpack.edus, pairings=dpack.pairings, data=dpack.data, target=new_target, ctarget=new_ctarget, labels=dpack.labels, vocab=dpack.vocab, graph=dpack.graph) target = np.copy(target) target[all_heads] = dpack.label_number('ROOT') target[inter_links] = unrelated # NEW return dpack, target
def for_intra(dpack, target): """Adapt a datapack to intrasentential decoding. An intrasenential datapack is almost identical to its original, except that we set the label for each ('ROOT', edu) pairing to 'ROOT' if that edu is a subgrouping head (if it has no parents other than than 'ROOT' within its subgrouping). This should be done before either `for_labelling` or `for_attachment` Returns ------- dpack: DataPack target: array(int) """ # find all edus that have intra incoming edges (to rule out) unrelated = dpack.label_number(UNRELATED) intra_tgts = defaultdict(set) for i, (edu1, edu2) in enumerate(dpack.pairings): subg = edu2.subgrouping if (edu1.subgrouping == subg and target[i] != unrelated): intra_tgts[subg].add(edu2.id) # pick out the (fakeroot, edu) pairs where edu does not have # incoming intra edges all_heads = [] for i, (edu1, edu2) in enumerate(dpack.pairings): subg = edu2.subgrouping if (edu1.id == FAKE_ROOT_ID and edu2.id not in intra_tgts[subg]): all_heads.append(i) # update datapack and target accordingly new_target = np.copy(dpack.target) new_target[all_heads] = dpack.label_number('ROOT') dpack = DataPack(edus=dpack.edus, pairings=dpack.pairings, data=dpack.data, target=new_target, labels=dpack.labels, vocab=dpack.vocab, graph=dpack.graph) target = np.copy(target) target[all_heads] = dpack.label_number('ROOT') return dpack, target
def for_intra(dpack, target): """Adapt a datapack to intrasentential decoding. An intrasentential datapack is almost identical to its original, except that we set the label for each ('ROOT', edu) pairing to 'ROOT' if that edu is a subgrouping head (if it has no parents other than 'ROOT' within its subgrouping). This should be done before either `for_labelling` or `for_attachment` Returns ------- dpack : DataPack target : array(int) """ # map EDUs to subgroup ids ; intra = pairs of EDUs with same subgroup id grp = {e.id: e.subgrouping for e in dpack.edus} # find all edus that have intra incoming edges (to rule out) unrelated = dpack.label_number(UNRELATED) intra_tgts = defaultdict(set) for i, (edu1, edu2) in enumerate(dpack.pairings): if (grp[edu1.id] == grp[edu2.id] and target[i] != unrelated): # edu2 has an incoming relation => not an (intra) root intra_tgts[grp[edu2.id]].add(edu2.id) # pick out the (fakeroot, edu) pairs where edu does not have # incoming intra edges all_heads = [ i for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.id == FAKE_ROOT_ID and edu2.id not in intra_tgts[grp[edu2.id]] ) ] # NEW pick out the original inter-sentential links, for removal inter_links = [ i for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.id != FAKE_ROOT_ID and grp[edu1.id] != grp[edu2.id] and target[i] != unrelated) ] # update datapack and target accordingly new_target = np.copy(dpack.target) new_target[all_heads] = dpack.label_number('ROOT') new_target[inter_links] = unrelated # NEW # WIP ctarget new_ctarget = {grp_name: ctgt for grp_name, ctgt in dpack.ctarget.items()} # FIXME replace each ctgt with the list of intra-sentential # RST (sub)trees # end WIP ctarget dpack = DataPack(edus=dpack.edus, pairings=dpack.pairings, data=dpack.data, target=new_target, ctarget=new_ctarget, labels=dpack.labels, vocab=dpack.vocab, graph=dpack.graph) target = np.copy(target) target[all_heads] = dpack.label_number('ROOT') target[inter_links] = unrelated # NEW return dpack, target