Exemple #1
0
def for_intra(dpack, target):
    """Adapt a datapack to intrasentential decoding.

    An intrasentential datapack is almost identical to its original,
    except that we set the label for each ('ROOT', edu) pairing to
    'ROOT' if that edu is a subgrouping head (if it has no parents other
    than 'ROOT' within its subgrouping).

    This should be done before either `for_labelling` or `for_attachment`

    Returns
    -------
    dpack : DataPack

    target : array(int)

    """
    # map EDUs to subgroup ids ; intra = pairs of EDUs with same subgroup id
    grp = {e.id: e.subgrouping for e in dpack.edus}
    # find all edus that have intra incoming edges (to rule out)
    unrelated = dpack.label_number(UNRELATED)
    intra_tgts = defaultdict(set)
    for i, (edu1, edu2) in enumerate(dpack.pairings):
        if (grp[edu1.id] == grp[edu2.id]
            and target[i] != unrelated):
            # edu2 has an incoming relation => not an (intra) root
            intra_tgts[grp[edu2.id]].add(edu2.id)
    # pick out the (fakeroot, edu) pairs where edu does not have
    # incoming intra edges
    all_heads = [i for i, (edu1, edu2) in enumerate(dpack.pairings)
                 if (edu1.id == FAKE_ROOT_ID
                     and edu2.id not in intra_tgts[grp[edu2.id]])]
    # NEW pick out the original inter-sentential links, for removal
    inter_links = [i for i, (edu1, edu2) in enumerate(dpack.pairings)
                   if (edu1.id != FAKE_ROOT_ID
                       and grp[edu1.id] != grp[edu2.id]
                       and target[i] != unrelated)]

    # update datapack and target accordingly
    new_target = np.copy(dpack.target)
    new_target[all_heads] = dpack.label_number('ROOT')
    new_target[inter_links] = unrelated  # NEW
    # WIP ctarget
    new_ctarget = {grp_name: ctgt
                   for grp_name, ctgt in dpack.ctarget.items()}
    # FIXME replace each ctgt with the list of intra-sentential
    # RST (sub)trees
    # end WIP ctarget
    dpack = DataPack(edus=dpack.edus,
                     pairings=dpack.pairings,
                     data=dpack.data,
                     target=new_target,
                     ctarget=new_ctarget,
                     labels=dpack.labels,
                     vocab=dpack.vocab,
                     graph=dpack.graph)
    target = np.copy(target)
    target[all_heads] = dpack.label_number('ROOT')
    target[inter_links] = unrelated  # NEW
    return dpack, target
Exemple #2
0
def for_intra(dpack, target):
    """Adapt a datapack to intrasentential decoding.

    An intrasenential datapack is almost identical to its original, except that
    we set the label for each ('ROOT', edu) pairing to 'ROOT' if that edu is a
    subgrouping head (if it has no parents other than than 'ROOT' within its
    subgrouping).

    This should be done before either `for_labelling` or `for_attachment`

    Returns
    -------
    dpack: DataPack
    target: array(int)
    """
    # find all edus that have intra incoming edges (to rule out)
    unrelated = dpack.label_number(UNRELATED)
    intra_tgts = defaultdict(set)
    for i, (edu1, edu2) in enumerate(dpack.pairings):
        subg = edu2.subgrouping
        if (edu1.subgrouping == subg and
                target[i] != unrelated):
            intra_tgts[subg].add(edu2.id)
    # pick out the (fakeroot, edu) pairs where edu does not have
    # incoming intra edges
    all_heads = []
    for i, (edu1, edu2) in enumerate(dpack.pairings):
        subg = edu2.subgrouping
        if (edu1.id == FAKE_ROOT_ID and
                edu2.id not in intra_tgts[subg]):
            all_heads.append(i)

    # update datapack and target accordingly
    new_target = np.copy(dpack.target)
    new_target[all_heads] = dpack.label_number('ROOT')
    dpack = DataPack(edus=dpack.edus,
                     pairings=dpack.pairings,
                     data=dpack.data,
                     target=new_target,
                     labels=dpack.labels,
                     vocab=dpack.vocab,
                     graph=dpack.graph)
    target = np.copy(target)
    target[all_heads] = dpack.label_number('ROOT')
    return dpack, target
Exemple #3
0
def for_intra(dpack, target):
    """Adapt a datapack to intrasentential decoding.

    An intrasentential datapack is almost identical to its original,
    except that we set the label for each ('ROOT', edu) pairing to
    'ROOT' if that edu is a subgrouping head (if it has no parents other
    than 'ROOT' within its subgrouping).

    This should be done before either `for_labelling` or `for_attachment`

    Returns
    -------
    dpack : DataPack

    target : array(int)

    """
    # map EDUs to subgroup ids ; intra = pairs of EDUs with same subgroup id
    grp = {e.id: e.subgrouping for e in dpack.edus}
    # find all edus that have intra incoming edges (to rule out)
    unrelated = dpack.label_number(UNRELATED)
    intra_tgts = defaultdict(set)
    for i, (edu1, edu2) in enumerate(dpack.pairings):
        if (grp[edu1.id] == grp[edu2.id] and target[i] != unrelated):
            # edu2 has an incoming relation => not an (intra) root
            intra_tgts[grp[edu2.id]].add(edu2.id)
    # pick out the (fakeroot, edu) pairs where edu does not have
    # incoming intra edges
    all_heads = [
        i for i, (edu1, edu2) in enumerate(dpack.pairings)
        if (edu1.id == FAKE_ROOT_ID and edu2.id not in intra_tgts[grp[edu2.id]]
            )
    ]
    # NEW pick out the original inter-sentential links, for removal
    inter_links = [
        i for i, (edu1, edu2) in enumerate(dpack.pairings)
        if (edu1.id != FAKE_ROOT_ID and grp[edu1.id] != grp[edu2.id]
            and target[i] != unrelated)
    ]

    # update datapack and target accordingly
    new_target = np.copy(dpack.target)
    new_target[all_heads] = dpack.label_number('ROOT')
    new_target[inter_links] = unrelated  # NEW
    # WIP ctarget
    new_ctarget = {grp_name: ctgt for grp_name, ctgt in dpack.ctarget.items()}
    # FIXME replace each ctgt with the list of intra-sentential
    # RST (sub)trees
    # end WIP ctarget
    dpack = DataPack(edus=dpack.edus,
                     pairings=dpack.pairings,
                     data=dpack.data,
                     target=new_target,
                     ctarget=new_ctarget,
                     labels=dpack.labels,
                     vocab=dpack.vocab,
                     graph=dpack.graph)
    target = np.copy(target)
    target[all_heads] = dpack.label_number('ROOT')
    target[inter_links] = unrelated  # NEW
    return dpack, target